datayoga-io/datayoga

Build job's `full_schema.json` with all the supported blocks

Closed this issue · 1 comments

Should be done as part of the publish.yaml action, the artifact should be uploaded to github so the playground can use it.

>>> from datayoga_core.job import Job
>>> Job.get_json_schema()
{'title': 'Job', 'description': 'Job descriptor', 'type': 'object', 'properties': {'input': {'$ref': '#/definitions/block'}, 'steps': {'type': 'array', 'items': {'$ref': '#/definitions/block'}}, 'error_handling': {'description': 'Error handling strategy: abort - terminate job, ignore - skip', 'type': 'string', 'enum': ['abort', 'ignore'], 'default': 'ignore'}}, 'additionalProperties': False, 'definitions': {'block': {'type': 'object', 'properties': {'uses': {'enum': ['http.write', 'http.receiver', 'relational.write', 'relational.read', 'sequence', 'rename_field', 'jinja_template', 'add_field', 'std.write', 'std.read', 'redis.write', 'redis.read_stream', 'redis.lookup', 'azure.read_event_hub', 'map', 'parquet.write', 'parquet.read', 'remove_field', 'filter', 'files.read_csv', 'cassandra.write']}}, 'allOf': [{'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'http.write'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'http.write', 'description': 'Write data using an HTTP request', 'type': 'object', 'properties': {'connection': {'type': 'string', 'title': 'The connection to use for the HTTP request', 'description': 'Logical connection name as defined in the connections.yaml', 'examples': ['api_connection', 'external_service']}, 'endpoint': {'oneOf': [{'type': 'string', 'title': 'API Endpoint', 'description': 'The endpoint URL for the HTTP request', 'examples': ['/users']}, {'type': 'object', 'properties': {'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'required': ['expression', 'language'], 'examples': [{'expression': "concat(['/users/', id]')", 'language': 'jmespath'}]}]}, 'method': {'type': 'string', 'title': 'HTTP Method', 'description': 'HTTP method to be used for the request', 'enum': ['GET', 'PUT', 'POST', 'DELETE'], 'examples': ['GET', 'POST']}, 'payload': {'type': 'object', 'title': 'Request Payload', 'description': 'Data to be sent in the request body', 'additionalProperties': {'oneOf': [{'type': 'string'}, {'type': 'object', 'properties': {'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'required': ['expression', 'language']}]}}, 'extra_headers': {'type': 'object', 'title': 'Additional HTTP Headers', 'description': 'Extra headers to be included in the HTTP request', 'additionalProperties': {'oneOf': [{'type': 'string'}, {'type': 'object', 'properties': {'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'required': ['expression', 'language']}]}}, 'extra_query_parameters': {'description': 'Extra query parameters to be included in the HTTP request', 'type': 'object', 'additionalProperties': {'oneOf': [{'type': 'string'}, {'type': 'object', 'properties': {'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'required': ['expression', 'language']}]}}, 'timeout': {'type': 'integer', 'title': 'Timeout in Seconds', 'description': 'Timeout duration for this specific HTTP request in seconds'}, 'output': {'type': 'object', 'properties': {'status_code': {'type': 'string', 'title': 'Status Code Field Name', 'description': 'Name of the field where the HTTP response status code will be stored after the request'}, 'headers': {'type': 'string', 'title': 'Headers Field Name', 'description': 'Name of the field where the HTTP response headers will be stored after the request'}, 'body': {'type': 'string', 'title': 'Body Field Name', 'description': 'Name of the field where the HTTP response content will be stored after the request'}}}}, 'required': ['connection', 'endpoint', 'method'], 'examples': [{'connection': 'http_example', 'endpoint': {'expression': "concat(['users/', id])", 'language': 'jmespath'}, 'method': 'PUT', 'payload': {'full_name': {'expression': 'full_name', 'language': 'jmespath'}, 'greeting': {'expression': 'greeting', 'language': 'jmespath'}}, 'extra_headers': {'my_header': {'expression': "lname || '-' || fname", 'language': 'sql'}}, 'extra_query_parameters': {'fname': {'expression': 'UPPER(fname)', 'language': 'sql'}}, 'output': {'status_code': 'response.status_code', 'headers': 'response.headers', 'body': 'response.content'}, 'timeout': 3}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'http.receiver'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'http.receiver', 'description': 'Receives HTTP requests and process the data.', 'type': 'object', 'properties': {'host': {'description': 'Host to listen', 'type': 'string', 'default': '0.0.0.0'}, 'port': {'description': 'Port to listen', 'type': 'integer', 'default': 8080}}, 'additionalProperties': False, 'examples': [{'host': 'localhost', 'port': 8080}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'relational.write'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'relational.write', 'description': 'Write into a SQL-compatible data store', 'type': 'object', 'additionalProperties': False, 'examples': [{'id': 'load_snowflake', 'type': 'relational.write', 'properties': {'connection': 'eu_datalake', 'table': 'employees', 'schema': 'dbo', 'load_strategy': 'APPEND'}}], 'properties': {'connection': {'type': 'string', 'title': 'The connection to use for loading', 'description': 'Logical connection name as defined in the connections.yaml', 'examples': ['europe_db', 'target', 'eu_dwh']}, 'schema': {'type': 'string', 'title': 'The table schema of the target table', 'description': 'If not specified, no specific schema will be used when connecting to the database.', 'examples': ['dbo']}, 'table': {'type': 'string', 'title': 'The target table name', 'description': 'Target table name', 'examples': ['employees']}, 'keys': {'type': 'array', 'title': 'Business keys to use in case of `load_strategy` is UPSERT or working with `opcode_field`', 'items': {'type': ['string', 'object'], 'title': 'name of column'}, 'examples': [['fname', {'lname': 'last_name'}]]}, 'mapping': {'type': 'array', 'title': 'Fields to write', 'items': {'type': ['string', 'object'], 'title': 'name of column'}, 'examples': [['fname', {'lname': 'last_name'}, 'address', 'gender']]}, 'foreach': {'type': 'string', 'title': 'Split a column into multiple records with a JMESPath expression', 'description': 'Use a JMESPath expression to split a column into multiple records. The expression should be in the format column: expression.', 'pattern': '^(?!:).*:.*(?<!:)$', 'examples': ['order_line: lines[]']}, 'opcode_field': {'type': 'string', 'description': 'Name of the field in the payload that holds the operation (c - create, d - delete, u - update) for this record in the DB'}, 'load_strategy': {'type': 'string', 'enum': ['APPEND', 'REPLACE', 'UPSERT', 'TYPE2'], 'description': 'type of target', 'default': 'APPEND'}, 'active_record_indicator': {'type': 'string', 'description': 'Used for `TYPE2` load_strategy. An SQL expression used to identify which rows are active', 'examples': ["is_active='Y'", 'deletedAt is null']}, 'inactive_record_mapping': {'type': 'array', 'title': 'Used for `TYPE2` load_strategy. The columns mapping to use to close out an active record', 'description': "A list of columns to use. Use any valid SQL expression for the source. If 'target' is omitted, will default to the name of the source column", 'default': [], 'examples': [[{'source': 'CURRENT_DATE', 'target': 'deletedAt'}, {'source': "'Y'", 'target': 'is_active'}]]}}, 'required': ['connection', 'table'], 'allOf': [{'not': {'required': ['opcode_field', 'load_strategy']}}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'relational.read'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'relational.read', 'description': 'Read a table from an SQL-compatible data store', 'type': 'object', 'additionalProperties': False, 'examples': [{'id': 'read_snowflake', 'type': 'relational.read', 'properties': {'connection': 'eu_datalake', 'table': 'employees', 'schema': 'dbo'}}], 'properties': {'connection': {'type': 'string', 'title': 'The connection to use for loading', 'description': 'Logical connection name as defined in the connections.yaml', 'examples': ['europe_db', 'target', 'eu_dwh']}, 'schema': {'type': 'string', 'title': 'The table schema of the table', 'description': 'If left blank, the default schema of this connection will be used as defined in the connections.yaml', 'examples': ['dbo']}, 'table': {'type': 'string', 'title': 'The table name', 'description': 'Table name', 'examples': ['employees']}, 'columns': {'type': 'array', 'title': 'Optional subset of columns to load', 'items': {'type': ['string', 'object'], 'title': 'name of column'}, 'examples': [['fname', {'lname': 'last_name'}]]}}, 'required': ['connection', 'table']}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'sequence'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'sequence', 'description': 'Add a sequence number field to data', 'type': 'object', 'additionalProperties': False, 'examples': [], 'required': [], 'properties': {'field': {'type': 'string', 'title': 'Name of new sequence field'}, 'start': {'type': 'number', 'title': 'Start entry', 'default': 1, 'examples': []}, 'increment': {'type': 'number', 'title': 'Increment between sequences', 'examples': []}}}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'rename_field'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'rename_field', 'description': 'Renames fields. All other fields remain unchanged', 'type': 'object', 'oneOf': [{'description': 'Rename multiple fields', 'properties': {'fields': {'type': 'array', 'description': 'Fields', 'items': {'type': 'object', 'properties': {'from_field': {'description': 'From field', 'type': 'string'}, 'to_field': {'description': 'To field', 'type': 'string'}}, 'additionalProperties': False, 'required': ['from_field', 'to_field'], 'examples': [{'fields': [{'from_field': 'name.lname', 'to_field': 'name.last_name'}, {'from_field': 'name.fname', 'to_field': 'name.first_name'}]}]}}}, 'required': ['fields'], 'additionalProperties': False}, {'description': 'Rename one field', 'properties': {'from_field': {'description': 'From field', 'type': 'string'}, 'to_field': {'description': 'To field', 'type': 'string'}}, 'additionalProperties': False, 'required': ['from_field', 'to_field'], 'examples': [{'from_field': 'name.lname', 'to_field': 'name.last_name'}]}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'jinja_template'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'jinja_template', 'description': 'Apply Jinja template to a field', 'type': 'object', 'properties': {'field': {'description': 'Field', 'type': 'string'}, 'template': {'description': 'Jinja Template', 'type': 'string'}}, 'additionalProperties': False, 'required': ['field', 'template'], 'examples': [{'field': 'name.full_name', 'template': '{{ name.fname }} {{ name.lname }}'}, {'field': 'name.fname_upper', 'template': '{{ name.fname | upper }}'}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'add_field'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'add_field', 'description': 'Add fields to a record', 'type': 'object', 'oneOf': [{'description': 'Add multiple fields', 'properties': {'fields': {'type': 'array', 'description': 'Fields', 'items': {'type': 'object', 'properties': {'field': {'description': 'Field', 'type': 'string'}, 'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'additionalProperties': False, 'required': ['field', 'expression', 'language']}}}, 'required': ['fields'], 'additionalProperties': False, 'examples': [{'fields': [{'field': 'name.full_name', 'language': 'jmespath', 'expression': "concat([name.fname, ' ', name.lname])"}, {'field': 'name.fname_upper', 'language': 'jmespath', 'expression': 'upper(name.fname)'}]}]}, {'description': 'Add one field', 'properties': {'field': {'description': 'Field', 'type': 'string'}, 'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'additionalProperties': False, 'required': ['field', 'expression', 'language'], 'examples': [{'field': 'country', 'language': 'sql', 'expression': "country_code || ' - ' || UPPER(country_name)"}]}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'std.write'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'std.write', 'description': 'Write to the standard output'}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'std.read'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'std.read', 'description': 'Read from the standard input'}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'redis.write'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'redis.write', 'description': 'Write to a Redis data structure', 'type': 'object', 'properties': {'connection': {'title': 'Connection name', 'type': 'string'}, 'command': {'enum': ['HSET', 'SADD', 'XADD', 'RPUSH', 'LPUSH', 'SET', 'ZADD'], 'default': 'HSET', 'type': 'string', 'title': 'Redis command', 'description': 'Redis command'}, 'key': {'description': 'Field to use as the Redis key', 'type': 'object', 'properties': {'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'required': ['expression', 'language']}}, 'additionalProperties': False, 'required': ['connection', 'key']}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'redis.read_stream'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'redis.read_stream', 'description': 'Read from Redis stream', 'type': 'object', 'properties': {'connection': {'description': 'Connection name', 'type': 'string'}, 'stream_name': {'type': 'string', 'title': 'Source stream name', 'description': 'Source stream name'}, 'snapshot': {'type': 'boolean', 'title': 'Snapshot current entries and quit', 'description': 'Snapshot current entries and quit', 'default': False}}, 'additionalProperties': False, 'required': ['connection', 'stream_name']}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'redis.lookup'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'redis.lookup', 'description': 'Lookup data from Redis using the given command and key', 'type': 'object', 'properties': {'connection': {'title': 'Connection name', 'type': 'string'}, 'cmd': {'title': 'Redis command', 'description': 'The command to execute', 'type': 'string'}, 'args': {'title': 'Redis command arguments', 'description': 'The list of expressions produces arguments', 'type': 'array', 'items': {'type': 'string'}}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}, 'field': {'type': 'string', 'title': 'Target field', 'description': 'The field to write the result to'}}, 'additionalProperties': False, 'required': ['connection', 'cmd', 'args', 'language', 'field']}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'azure.read_event_hub'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'azure.read_event_hub', 'description': 'Read from Azure Event Hub', 'type': 'object', 'properties': {'event_hub_connection_string': {'type': 'string', 'description': 'The connection string for the Azure Event Hub namespace.'}, 'event_hub_consumer_group_name': {'type': 'string', 'description': 'The name of the consumer group to read events from.'}, 'event_hub_name': {'type': 'string', 'description': 'The name of the Azure Event Hub.'}, 'checkpoint_store_connection_string': {'type': 'string', 'description': 'The connection string for the Azure Storage account used as the checkpoint store.'}, 'checkpoint_store_container_name': {'type': 'string', 'description': 'The name of the container within the checkpoint store to store the checkpoints.'}, 'batch_size': {'type': 'integer', 'description': 'The maximum number of events to receive in each batch.', 'default': 300}}, 'required': ['event_hub_connection_string', 'event_hub_consumer_group_name', 'event_hub_name', 'checkpoint_store_connection_string', 'checkpoint_store_container_name']}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'map'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'map', 'description': 'Maps a record into a new output based on expressions', 'type': 'object', 'properties': {'expression': {'description': 'Expression', 'type': ['object', 'string']}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'additionalProperties': False, 'required': ['expression', 'language'], 'examples': [{'expression': {'first_name': 'first_name', 'last_name': 'last_name', 'greeting': "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name", 'country': 'country', 'full_name': 'full_name'}, 'language': 'sql'}, {'expression': '{"CustomerId": "customer_id", "FirstName": "first_name", "LastName": "last_name", "Company": "company", "Location": {"Street": "address", "City": "city", "State": "state", "Country": "country", "PostalCode": "postal_code"}, "Phone": "phone", "Fax": "fax", "Email": "email"}', 'language': 'jmespath'}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'parquet.write'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'parquet.write', 'description': 'Write data to parquet', 'type': 'object', 'properties': {'file': {'description': 'Filename. Can contain a regexp or glob expression', 'type': 'string'}}, 'additionalProperties': False, 'required': ['file'], 'examples': [{'file': 'data.parquet'}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'parquet.read'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'parquet.read', 'description': 'Read data from parquet', 'type': 'object', 'properties': {'file': {'description': 'Filename. Can contain a regexp or glob expression', 'type': 'string'}}, 'additionalProperties': False, 'required': ['file'], 'examples': [{'file': 'data.parquet'}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'remove_field'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'remove_field', 'description': 'Remove fields', 'type': 'object', 'oneOf': [{'description': 'Remove multiple fields', 'properties': {'fields': {'type': 'array', 'description': 'Fields', 'items': {'type': 'object', 'properties': {'field': {'description': 'Field', 'type': 'string'}}, 'additionalProperties': False, 'required': ['field']}}}, 'required': ['fields'], 'additionalProperties': False, 'examples': [{'fields': [{'field': 'credit_card'}, {'field': 'name.mname'}]}]}, {'description': 'Remove one field', 'properties': {'field': {'description': 'Field', 'type': 'string'}}, 'additionalProperties': False, 'required': ['field'], 'examples': [{'field': 'credit_card'}]}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'filter'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'filter', 'description': 'Filter records', 'type': 'object', 'properties': {'expression': {'description': 'Expression', 'type': 'string'}, 'language': {'description': 'Language', 'type': 'string', 'enum': ['jmespath', 'sql']}}, 'additionalProperties': False, 'required': ['expression', 'language'], 'examples': [{'language': 'sql', 'expression': 'age>20'}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'files.read_csv'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'files.read_csv', 'description': 'Read data from CSV', 'type': 'object', 'properties': {'file': {'description': 'Filename. Can contain a regexp or glob expression', 'type': 'string'}, 'encoding': {'description': 'Encoding to use for reading the file', 'type': 'string', 'default': 'utf-8'}, 'fields': {'type': 'array', 'title': 'List of columns to use', 'description': 'List of columns to use for extract', 'default': None, 'examples': [['fname', 'lname']], 'minLength': 1, 'additionalItems': True, 'items': {'type': 'string', 'description': 'field name', 'examples': ['fname']}}, 'skip': {'description': 'Number of lines to skip', 'type': 'number', 'minimum': 0, 'default': 0}, 'delimiter': {'description': 'Delimiter to use for splitting the csv records', 'type': 'string', 'minLength': 1, 'maxLength': 1, 'default': ','}, 'batch_size': {'description': 'Number of records to read per batch', 'type': 'number', 'minimum': 1, 'default': 1000}, 'quotechar': {'description': "A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '", 'type': 'string', 'minLength': 1, 'maxLength': 1, 'default': '"'}}, 'additionalProperties': False, 'required': ['file'], 'examples': [{'file': 'archive.csv', 'delimiter': ';'}]}}}}, {'if': {'properties': {'uses': {'description': 'Block type', 'type': 'string', 'const': 'cassandra.write'}}, 'required': ['uses']}, 'then': {'properties': {'with': {'title': 'cassandra.write', 'description': 'Write into a Cassandra data store', 'type': 'object', 'additionalProperties': False, 'examples': [{'id': 'load_snowflake', 'type': 'relational.write', 'properties': {'connection': 'eu_datalake', 'table': 'employees', 'schema': 'dbo', 'load_strategy': 'APPEND'}}], 'properties': {'connection': {'type': 'string', 'title': 'The connection to use for loading', 'description': 'Logical connection name as defined in the connections.yaml', 'examples': ['europe_db', 'target', 'eu_dwh']}, 'keyspace': {'type': 'string', 'title': 'Keyspace', 'description': 'Keyspace', 'examples': ['employees']}, 'table': {'type': 'string', 'title': 'The target table name', 'description': 'Target table name', 'examples': ['employees']}, 'keys': {'type': 'array', 'title': 'Business keys', 'items': {'type': ['string', 'object'], 'title': 'name of column'}, 'examples': [['fname', {'lname': 'last_name'}]]}, 'mapping': {'type': 'array', 'title': 'Fields to write', 'items': {'type': ['string', 'object'], 'title': 'name of column'}, 'examples': [['fname', {'lname': 'last_name'}, 'address', 'gender']]}, 'opcode_field': {'type': 'string', 'description': 'Name of the field in the payload that holds the operation (c - create, d - delete, u - update) for this record in the DB'}}, 'required': ['connection', 'keyspace', 'table', 'keys', 'mapping', 'opcode_field']}}}}]}}}