Serializing with JSON
Setup
The following describes through examples how a DeltaStream query will convert JSON payloads to DeltaStream’s Data Types when reading from a #_stream or #_changelog.
For the following examples, we will be using the Stream defined below:
CREATE STREAM jsonExample (
"booleanValue" BOOLEAN,
"stringValue" VARCHAR,
"tinyIntValue" TINYINT,
"smallIntValue" SMALLINT,
"intValue" INTEGER,
"bigIntValue" BIGINT,
"floatValue" FLOAT,
"doubleValue" DOUBLE,
"decimalValue" DECIMAL(4, 3),
"dateValue" DATE,
"timeValue" TIME,
"timestampValue" TIMESTAMP(3),
"timestampLtzValue" TIMESTAMP_LTZ,
"bytesValue" VARBINARY,
"arrayValue" ARRAY<VARCHAR>,
"mapValue" MAP<VARCHAR, BIGINT>,
"structValue" STRUCT<col1 BIGINT>
) WITH (
'topic' = 'jsonExample', 'value.format' = 'JSON'
);
Simple Example
With the query:
SELECT * FROM jsonExample;
// input record
{
"booleanValue": true,
"stringValue": "howdy",
"tinyIntValue": 1,
"smallIntValue": 12,
"intValue": 1234,
"bigIntValue": 123456789,
"floatValue": 12.34,
"doubleValue": 1234.5678,
"decimalValue": 1.123,
"dateValue": "2019-12-26",
"timeValue": "16:15:14",
"timestampValue": "2011-12-03 10:15:30",
"timestampLtzValue": "2021-05-31 16:15:14.528Z",
"bytesValue": "aG93ZHk=",
"arrayValue": [
"News",
"Travel"
],
"mapValue": {
"count": 17
},
"structValue": {
"col1": 1234
}
}
// output record
{
"booleanValue": true,
"stringValue": "howdy",
"tinyIntValue": 1,
"smallIntValue": 12,
"intValue": 1234,
"bigIntValue": 123456789,
"floatValue": 12.34,
"doubleValue": 1234.5678,
"decimalValue": 1.123,
"dateValue": "2019-12-26",
"timeValue": "16:15:14",
"timestampValue": "2011-12-03 10:15:30",
"timestampLtzValue": "2021-05-31 16:15:14.528Z",
"bytesValue": "aG93ZHk=",
"arrayValue": [
"News",
"Travel"
],
"mapValue": {
"count": 17
},
"structValue": {
"col1": 1234
}
}
Partial Record Example
When JSON records are missing fields that are specified by the CREATE STREAM DDL statement, those fields will be given the value NULL
in the output record. In the opposite case when JSON records have fields that aren’t specified by the CREATE STREAM DDL statement, then those fields will be ignored.
With the query:
SELECT "booleanValue", "stringValue", "intValue" FROM jsonExample;
// input record
{
"booleanValue": true,
"stringValue": "howdy",
"someOtherValue": 123
}
// output record
{
"booleanValue": true,
"stringValue": "howdy",
"intValue": null
}
Mismatched Types Example
Boolean
With the query:
SELECT "booleanValue" FROM jsonExample;
// input record
{ "booleanValue": true }
//output record
{ "booleanValue": true }
// input record
{ "booleanValue": false }
//output record
{ "booleanValue": false }
// input record
{ "booleanValue": "true" }
//output record
{ "booleanValue": true }
// input record
{ "booleanValue": "false" }
//output record
{ "booleanValue": false }
// input record
{ "booleanValue": "abc" }
//output record
{ "booleanValue": false }
// input record
{ "booleanValue": 123 }
//output record
{ "booleanValue": false }
Character String
With the query:
SELECT "stringValue" FROM jsonExample;
// input record
{ "stringValue": "abc" }
//output record
{ "stringValue": "abc" }
// input record
{ "stringValue": true }
//output record
{ "stringValue": "true" }
// input record
{ "stringValue": 123 }
//output record
{ "stringValue": "123" }
// input record
{ "stringValue": 123.456 }
//output record
{ "stringValue": "123.456" }
Numeric
With the query:
SELECT "tinyIntValue", "doubleValue", "decimalValue" FROM jsonExample;
// input record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 1.123 }
// output record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 1.123 }
// input record
{ "tinyIntValue": "1", "doubleValue": "123.1", "decimalValue": "1.123" }
// output record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 1.123 }
// input record ("decimalValue" value is larger than defined precision)
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 12.123 }
// output record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": null }
// input record
{ "tinyIntValue": "abc", "doubleValue": 123.1, "decimalValue": 1.123 }
// deserialization error because String can't be cast to numeric value
// input record
{ "tinyIntValue": 130, "doubleValue": 123.1, "decimalValue": 1.123 }
// deserialization error because 130 is out of range for TINYINT values
// input record
{ "tinyIntValue": 1.1, "doubleValue": 123.1, "decimalValue": 1.123 }
// deserialization error because TINYINT cannot be a floating point value
Date and Time
SELECT
"dateValue",
"timeValue",
"timestampValue",
"timestampLtzValue"
FROM
jsonExample;
// input record
{
"dateValue": "2019-12-26",
"timeValue": "16:15:14",
"timestampValue": "2011-12-03 10:15:30",
"timestampLtzValue": "2021-05-31 16:15:14.528Z"
}
// output record
{
"dateValue": "2019-12-26",
"timeValue": "16:15:14",
"timestampValue": "2011-12-03 10:15:30",
"timestampLtzValue": "2021-05-31 16:15:14.528Z"
}
// input record
{ "dateValue": 1234 }
// deserialization error because date and time values must be parsed from Strings
// input record
{ "dateValue": "2019-04-31" }
// deserialization error due to invalid date (April only has 30 days)
Binary String
With the query:
SELECT "bytesValue" FROM jsonExample;
// input record
{ "bytesValue": "aG93ZHk=" }
// output record
{ "bytesValue": "aG93ZHk=" }
// input record
{ "bytesValue": 1 }
// deserialization error because we expect a String value
Constructed Data Types
With the query:
SELECT "arrayValue", "mapValue", "structValue" FROM jsonExample;
// input record
{
"arrayValue": [
"News",
"Travel"
],
"mapValue": {
"count": 17
},
"structValue": {
"col1": 1234
}
}
// output record
{
"arrayValue": [
"News",
"Travel"
],
"mapValue": {
"count": 17
},
"structValue": {
"col1": 1234
}
}
Array
// input record
{ "arrayValue": [] }
// output record
{
"arrayValue": [],
"mapValue": null,
"structValue": null
}
// input record
{
"arrayValue": [ 17 ]
}
// output record
{
"arrayValue": [ "17" ],
"mapValue": null,
"structValue": null
}
// input record
{
"arrayValue": []
}
// output record
{
"arrayValue": [],
"mapValue": null,
"structValue": null
}
Map
// input record
{
"mapValue": {
"count": 17,
"index": 102
}
}
// output record
{
"arrayValue": null,
"mapValue": {
"count": 17,
"index": 102
},
"structValue": null
}
// input record
{
"mapValue": {
"count": "howdy"
}
}
// deserialization error because String can't be cast to numeric value
// input record
{
"mapValue": {}
}
// output record
{
"arrayValue": null,
"mapValue": {},
"structValue": null
}
Struct
// input record
{
"structValue": {
"col1": 1234,
"col2": 5678
}
}
// output record
{
"arrayValue": null,
"mapValue": null,
"structValue": {
"col1": 1234
}
}
// input record
{
"structValue": {
"col2": 5678
}
}
// output record
{
"arrayValue": null,
"mapValue": null,
"structValue": {
"col1": null
}
}
// input record
{
"structValue": {}
}
// output record
{
"arrayValue": null,
"mapValue": null,
"structValue": {
"col1": null
}
}
Last updated