Serializing with JSON

Setup

The following describes through examples how a DeltaStream query will convert JSON payloads to DeltaStream’s Data Types when reading from a #_stream or #_changelog.

For the following examples, we will be using the Stream defined below:

CREATE STREAM jsonExample (
  "booleanValue" BOOLEAN,
  "stringValue" VARCHAR,
  "tinyIntValue" TINYINT,
  "smallIntValue" SMALLINT,
  "intValue" INTEGER,
  "bigIntValue" BIGINT,
  "floatValue" FLOAT,
  "doubleValue" DOUBLE,
  "decimalValue" DECIMAL(4, 3),
  "dateValue" DATE,
  "timeValue" TIME,
  "timestampValue" TIMESTAMP(3),
  "timestampLtzValue" TIMESTAMP_LTZ,
  "bytesValue" VARBINARY,
  "arrayValue" ARRAY<VARCHAR>,
  "mapValue" MAP<VARCHAR, BIGINT>,
  "structValue" STRUCT<col1 BIGINT>
) WITH (
  'topic' = 'jsonExample', 'value.format' = 'JSON'
);

Simple Example

With the query:

SELECT * FROM jsonExample;
// input record
{
  "booleanValue": true,
  "stringValue": "howdy",
  "tinyIntValue": 1,
  "smallIntValue": 12,
  "intValue": 1234,
  "bigIntValue": 123456789,
  "floatValue": 12.34,
  "doubleValue": 1234.5678,
  "decimalValue": 1.123,
  "dateValue": "2019-12-26",
  "timeValue": "16:15:14",
  "timestampValue": "2011-12-03 10:15:30",
  "timestampLtzValue": "2021-05-31 16:15:14.528Z",
  "bytesValue": "aG93ZHk=",
  "arrayValue": [
    "News",
    "Travel"
  ],
  "mapValue": {
    "count": 17
  },
  "structValue": {
    "col1": 1234
  }
}
// output record
{
  "booleanValue": true,
  "stringValue": "howdy",
  "tinyIntValue": 1,
  "smallIntValue": 12,
  "intValue": 1234,
  "bigIntValue": 123456789,
  "floatValue": 12.34,
  "doubleValue": 1234.5678,
  "decimalValue": 1.123,
  "dateValue": "2019-12-26",
  "timeValue": "16:15:14",
  "timestampValue": "2011-12-03 10:15:30",
  "timestampLtzValue": "2021-05-31 16:15:14.528Z",
  "bytesValue": "aG93ZHk=",
  "arrayValue": [
    "News",
    "Travel"
  ],
  "mapValue": {
    "count": 17
  },
  "structValue": {
    "col1": 1234
  }
}

Partial Record Example

When JSON records are missing fields that are specified by the CREATE STREAM DDL statement, those fields will be given the value NULL in the output record. In the opposite case when JSON records have fields that aren’t specified by the CREATE STREAM DDL statement, then those fields will be ignored.

With the query:

SELECT "booleanValue", "stringValue", "intValue" FROM jsonExample;
// input record
{
  "booleanValue": true,
  "stringValue": "howdy",
  "someOtherValue": 123
}

// output record
{
  "booleanValue": true,
  "stringValue": "howdy",
  "intValue": null
}

Mismatched Types Example

Boolean

With the query:

SELECT "booleanValue" FROM jsonExample;
// input record
{ "booleanValue": true }

//output record
{ "booleanValue": true }
// input record
{ "booleanValue": false }

//output record
{ "booleanValue": false }
// input record
{ "booleanValue": "true" }

//output record
{ "booleanValue": true }
// input record
{ "booleanValue": "false" }

//output record
{ "booleanValue": false }
// input record
{ "booleanValue": "abc" }

//output record
{ "booleanValue": false }
// input record
{ "booleanValue": 123 }

//output record
{ "booleanValue": false }

Character String

With the query:

SELECT "stringValue" FROM jsonExample;
// input record
{ "stringValue": "abc" }

//output record
{ "stringValue": "abc" }
// input record
{ "stringValue": true }

//output record
{ "stringValue": "true" }
// input record
{ "stringValue": 123 }

//output record
{ "stringValue": "123" }
// input record
{ "stringValue": 123.456 }

//output record
{ "stringValue": "123.456" }

Numeric

With the query:

SELECT "tinyIntValue", "doubleValue", "decimalValue" FROM jsonExample;
// input record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 1.123 }

// output record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 1.123 }
// input record
{ "tinyIntValue": "1", "doubleValue": "123.1", "decimalValue": "1.123" }

// output record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 1.123 }
// input record ("decimalValue" value is larger than defined precision)
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": 12.123 }

// output record
{ "tinyIntValue": 1, "doubleValue": 123.1, "decimalValue": null }
// input record
{ "tinyIntValue": "abc", "doubleValue": 123.1, "decimalValue": 1.123 }

// deserialization error because String can't be cast to numeric value
// input record
{ "tinyIntValue": 130, "doubleValue": 123.1, "decimalValue": 1.123 }

// deserialization error because 130 is out of range for TINYINT values
// input record
{ "tinyIntValue": 1.1, "doubleValue": 123.1, "decimalValue": 1.123 }

// deserialization error because TINYINT cannot be a floating point value

Date and Time

SELECT 
  "dateValue", 
  "timeValue", 
  "timestampValue", 
  "timestampLtzValue" 
FROM 
  jsonExample;
// input record
{
  "dateValue": "2019-12-26",
  "timeValue": "16:15:14",
  "timestampValue": "2011-12-03 10:15:30",
  "timestampLtzValue": "2021-05-31 16:15:14.528Z"
}

// output record
{
  "dateValue": "2019-12-26",
  "timeValue": "16:15:14",
  "timestampValue": "2011-12-03 10:15:30",
  "timestampLtzValue": "2021-05-31 16:15:14.528Z"
}
// input record
{ "dateValue": 1234 }

// deserialization error because date and time values must be parsed from Strings
// input record
{ "dateValue": "2019-04-31" }

// deserialization error due to invalid date (April only has 30 days)

Binary String

With the query:

SELECT "bytesValue" FROM jsonExample;
// input record
{ "bytesValue": "aG93ZHk=" }

// output record
{ "bytesValue": "aG93ZHk=" }
// input record
{ "bytesValue": 1 }

// deserialization error because we expect a String value

Constructed Data Types

With the query:

SELECT "arrayValue", "mapValue", "structValue" FROM jsonExample;
// input record
{
  "arrayValue": [
    "News",
    "Travel"
  ],
  "mapValue": {
    "count": 17
  },
  "structValue": {
    "col1": 1234
  }
}

// output record
{
  "arrayValue": [
    "News",
    "Travel"
  ],
  "mapValue": {
    "count": 17
  },
  "structValue": {
    "col1": 1234
  }
}

Array

// input record
{ "arrayValue": [] }

// output record
{
  "arrayValue": [],
  "mapValue": null,
  "structValue": null
}
// input record
{
  "arrayValue": [ 17 ]
}

// output record
{
  "arrayValue": [ "17" ],
  "mapValue": null,
  "structValue": null
}
// input record
{
  "arrayValue": []
}

// output record
{
  "arrayValue": [],
  "mapValue": null,
  "structValue": null
}

Map

// input record
{
  "mapValue": {
    "count": 17,
    "index": 102
  }
}

// output record
{
  "arrayValue": null,
  "mapValue": {
    "count": 17,
    "index": 102
  },
  "structValue": null
}
// input record
{
  "mapValue": {
    "count": "howdy"
  }
}

// deserialization error because String can't be cast to numeric value
// input record
{
  "mapValue": {}
}

// output record
{
  "arrayValue": null,
  "mapValue": {},
  "structValue": null
}

Struct

// input record
{
  "structValue": {
    "col1": 1234,
    "col2": 5678
  }
}

// output record
{
  "arrayValue": null,
  "mapValue": null,
  "structValue": {
    "col1": 1234
  }
}
// input record
{
  "structValue": {
    "col2": 5678
  }
}

// output record
{
  "arrayValue": null,
  "mapValue": null,
  "structValue": {
    "col1": null
  }
}
// input record
{
  "structValue": {}
}

// output record
{
  "arrayValue": null,
  "mapValue": null,
  "structValue": {
    "col1": null
  }
}

Last updated