Hi,
I'm trying to parse JSON data from Eurostats where they use the JSON-Stat format which means a lot of key-value pairs.
Example:
http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/tran_sf_roadse?precision=1&sex=F&time=2018&time=2017 I tried but couldn't get to query the values AND the key of the subarrays (e.g. the "value" array:
{
"version": "2.0",
"label": "Persons killed in road accidents by sex (CARE data)",
"href": "http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/tran_sf_roadse?precision=1&sex=F&time=2018&time=2017",
"source": "Eurostat",
"updated": "2020-04-14",
"status": {
"30": ":",
"31": ":",
"36": ":",
"37": ":",
"47": ":",
"49": ":",
"51": ":",
"58": ":",
"59": ":"
},
"extension": {
"datasetId": "tran_sf_roadse",
"lang": "EN",
"description": null,
"subTitle": null,
"status": {
"label": {
":": "not available"
}
}
},
"class": "dataset",
"value": {
"0": 104,
"1": 98,
"2": 148,
"3": 153,
"4": 186,
"5": 158,
"6": 67,
"7": 61,
"8": 8,
"9": 10,
"10": 143,
"11": 151,
"12": 830,
"13": 795,
"14": 43,
"15": 49,
"16": 17,
"17": 20,
"18": 116,
"19": 137,
"20": 424,
"21": 407,
"22": 56,
"23": 63,
"24": 777,
"25": 755,
"26": 63,
"27": 66,
"28": 168,
"29": 163,
"32": 7,
"33": 6,
"34": 669,
"35": 661,
"38": 4,
"39": 5,
"40": 37,
"41": 37,
"42": 9,
"43": 5,
"44": 137,
"45": 176,
"46": 32,
"48": 747,
"50": 127,
"52": 502,
"53": 482,
"54": 57,
"55": 75,
"56": 26,
"57": 21,
"60": 472,
"61": 472
},
"dimension": {
"unit": {
"label": "unit",
"category": {
"index": {
"NR": 0
},
"label": {
"NR": "Number"
}
}
},
"sex": {
"label": "sex",
"category": {
"index": {
"F": 0
},
"label": {
"F": "Females"
}
}
},
"geo": {
"label": "geo",
"category": {
"index": {
"AT": 0,
"BE": 1,
"BG": 2,
"CH": 3,
"CY": 4,
"CZ": 5,
"DE": 6,
"DK": 7,
"EE": 8,
"EL": 9,
"ES": 10,
"FI": 11,
"FR": 12,
"HR": 13,
"HU": 14,
"IE": 15,
"IS": 16,
"IT": 17,
"LT": 18,
"LU": 19,
"LV": 20,
"MT": 21,
"NL": 22,
"NO": 23,
"PL": 24,
"PT": 25,
"RO": 26,
"SE": 27,
"SI": 28,
"SK": 29,
"UK": 30
},
"label": {
"AT": "Austria",
"BE": "Belgium",
"BG": "Bulgaria",
"CH": "Switzerland",
"CY": "Cyprus",
"CZ": "Czechia",
"DE": "Germany (until 1990 former territory of the FRG)",
"DK": "Denmark",
"EE": "Estonia",
"EL": "Greece",
"ES": "Spain",
"FI": "Finland",
"FR": "France",
"HR": "Croatia",
"HU": "Hungary",
"IE": "Ireland",
"IS": "Iceland",
"IT": "Italy",
"LT": "Lithuania",
"LU": "Luxembourg",
"LV": "Latvia",
"MT": "Malta",
"NL": "Netherlands",
"NO": "Norway",
"PL": "Poland",
"PT": "Portugal",
"RO": "Romania",
"SE": "Sweden",
"SI": "Slovenia",
"SK": "Slovakia",
"UK": "United Kingdom"
}
}
},
"time": {
"label": "time",
"category": {
"index": {
"2017": 0,
"2018": 1
},
"label": {
"2017": "2017",
"2018": "2018"
}
}
}
},
"id": [
"unit",
"sex",
"geo",
"time"
],
"size": [
1,
1,
31,
2
]
}
Problem I'm facing, the keys are running number and can change according to the data. I loaded the JSON into a table (with the JSON check constraint) and tried it with the JSON_TABLE syntax:
--create table
CREATE TABLE TBL_EUROSTAT_RAW_JSON
(
ID NUMBER GENERATED ALWAYS AS IDENTITY INCREMENT BY 1 MAXVALUE 9999999999999999999999999999 MINVALUE 1 CACHE 20 NOT NULL
, IMPORTED_WHEN TIMESTAMP(6) DEFAULT SYSDATE
, IMPORTED_JSON CLOB);
--add index
ALTER TABLE TBL_EUROSTAT_RAW_JSON
ADD CONSTRAINT TBL_EUROSTAT_RAW_JSON_CHK1 CHECK
(imported_json is JSON)
ENABLE;
--load data
declare
l_clob clob;
l_buffer varchar2(32767);
l_amount number;
l_offset number;
begin
l_clob := apex_web_service.make_rest_request(
p_url => 'https://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/tran_sf_roadse?precision=1&sex=F&time=2018&time=2017',
p_http_method => 'GET');
l_amount := 32000;
l_offset := 1;
begin
loop
dbms_lob.read( l_clob, l_amount, l_offset, l_buffer );
dbms_output.put_line(l_buffer);
l_offset := l_offset + l_amount;
l_amount := 32000;
end loop;
exception
when no_data_found then
null;
end;
INSERT INTO tbl_eurostat_raw_json (
imported_json
) VALUES (
l_clob
);
end;
/
--QUERY
SELECT estat.*
FROM tbl_eurostat_raw_json e,
json_table(e.imported_json
COLUMNS (version PATH '$.version'
--,value VARCHAR2(1000 CHAR) FORMAT JSON
, NESTED PATH '$.value.*' COLUMNS (
status_index PATH '$.value' ,
status_value PATH '$[*]'
)
)) AS estat;
Thing is there are no arrays in that JSON. Just a big JSON objects.
I suggest creating a JSON search index with the Data Guide enabled. This gives you a couple of things:
- You can get the schema for a row or set of by calling JSON_Dataguide
- The ability to auto-create views with the JSON table expression
For example:
set define off
insert into tbl_eurostat_raw_json (
imported_json
) values (
'{
"version": "2.0",
"label": "Persons killed in road accidents by sex (CARE data)",
"href": "http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/tran_sf_roadse?precision=1&sex=F&time=2018&time=2017",
"source": "Eurostat",
"updated": "2020-04-14",
"status": {
"30": ":",
"31": ":",
"36": ":",
"37": ":",
"47": ":",
"49": ":",
"51": ":",
"58": ":",
"59": ":"
},
"extension": {
"datasetId": "tran_sf_roadse",
"lang": "EN",
"description": null,
"subTitle": null,
"status": {
"label": {
":": "not available"
}
}
},
"class": "dataset",
"value": {
"0": 104,
"1": 98,
"2": 148,
"3": 153,
"4": 186,
"5": 158,
"6": 67,
"7": 61,
"8": 8,
"9": 10,
"10": 143,
"11": 151,
"12": 830,
"13": 795,
"14": 43,
"15": 49,
"16": 17,
"17": 20,
"18": 116,
"19": 137,
"20": 424,
"21": 407,
"22": 56,
"23": 63,
"24": 777,
"25": 755,
"26": 63,
"27": 66,
"28": 168,
"29": 163,
"32": 7,
"33": 6,
"34": 669,
"35": 661,
"38": 4,
"39": 5,
"40": 37,
"41": 37,
"42": 9,
"43": 5,
"44": 137,
"45": 176,
"46": 32,
"48": 747,
"50": 127,
"52": 502,
"53": 482,
"54": 57,
"55": 75,
"56": 26,
"57": 21,
"60": 472,
"61": 472
},
"dimension": {
"unit": {
"label": "unit",
"category": {
"index": {
"NR": 0
},
"label": {
"NR": "Number"
}
}
},
"sex": {
"label": "sex",
"category": {
"index": {
"F": 0
},
"label": {
"F": "Females"
}
}
},
"geo": {
"label": "geo",
"category": {
"index": {
"AT": 0,
"BE": 1,
"BG": 2,
"CH": 3,
"CY": 4,
"CZ": 5,
"DE": 6,
"DK": 7,
"EE": 8,
"EL": 9,
"ES": 10,
"FI": 11,
"FR": 12,
"HR": 13,
"HU": 14,
"IE": 15,
"IS": 16,
"IT": 17,
"LT": 18,
"LU": 19,
"LV": 20,
"MT": 21,
"NL": 22,
"NO": 23,
"PL": 24,
"PT": 25,
"RO": 26,
"SE": 27,
"SI": 28,
"SK": 29,
"UK": 30
},
"label": {
"AT": "Austria",
"BE": "Belgium",
"BG": "Bulgaria",
"CH": "Switzerland",
"CY": "Cyprus",
"CZ": "Czechia",
"DE": "Germany (until 1990 former territory of the FRG)",
"DK": "Denmark",
"EE": "Estonia",
"EL": "Greece",
"ES": "Spain",
"FI": "Finland",
"FR": "France",
"HR": "Croatia",
"HU": "Hungary",
"IE": "Ireland",
"IS": "Iceland",
"IT": "Italy",
"LT": "Lithuania",
"LU": "Luxembourg",
"LV": "Latvia",
"MT": "Malta",
"NL": "Netherlands",
"NO": "Norway",
"PL": "Poland",
"PT": "Portugal",
"RO": "Romania",
"SE": "Sweden",
"SI": "Slovenia",
"SK": "Slovakia",
"UK": "United Kingdom"
}
}
},
"time": {
"label": "time",
"category": {
"index": {
"2017": 0,
"2018": 1
},
"label": {
"2017": "2017",
"2018": "2018"
}
}
}
},
"id": [
"unit",
"sex",
"geo",
"time"
],
"size": [
1,
1,
31,
2
]
}'
);
commit;
create search index euro_json_i
on tbl_eurostat_raw_json ( imported_json )
for json
parameters ( 'dataguide on' );
set long 1000
select json_dataguide ( imported_json, dbms_json.format_hierarchical, dbms_json.pretty )
from tbl_eurostat_raw_json;
JSON_DATAGUIDE(IMPORTED_JSON,DBMS_JSON.FORMAT_HIERARCHICAL,DBMS_JSON.PRETTY)
{
"type" : "object",
"properties" :
{
"id" :
{
"type" : "array",
"o:length" : 32,
"o:preferred_column_name" : "id",
"items" :
{
"type" : "string",
"o:length" : 4,
"o:preferred_column_name" : "scalar_string"
}
},
"href" :
{
"type" : "string",
"o:length" : 128,
"o:preferred_column_name" : "href"
},
"size" :
{
"type" : "array",
"o:length" : 16,
"o:preferred_column_name" : "size",
"items" :
{
"type" : "number",
"o:length" : 2,
"o:preferred_column_name" : "scalar_number"
}
}
...
insert into tbl_eurostat_raw_json (
imported_json
) values ( '{ "other": "value" }' );
commit;
select json_dataguide ( imported_json, dbms_json.format_hierarchical, dbms_json.pretty )
from tbl_eurostat_raw_json
where id = 2;
JSON_DATAGUIDE(IMPORTED_JSON,DBMS_JSON.FORMAT_HIERARCHICAL,DBMS_JSON.PRETTY)
{
"type" : "object",
"properties" :
{
"other" :
{
"type" : "string",
"o:length" : 8,
"o:preferred_column_name" : "other"
}
}
}
begin
dbms_json.create_view_on_path (
'euro_view_values',
'tbl_eurostat_raw_json',
'imported_json',
'$.value'
);
end;
/
select id, "IMPORTED_JSON$class", "IMPORTED_JSON$other",
"IMPORTED_JSON$0",
"IMPORTED_JSON$1",
"IMPORTED_JSON$2" --etc.
from euro_view_values;
ID IMPORTED_JSON$class IMPORTED_JSON$other IMPORTED_JSON$0 IMPORTED_JSON$1 IMPORTED_JSON$2
1 dataset <null> 104 98 148
2 <null> value <null> <null> <null>
Calling create_view_on_path re-creates the view, using the attributes that are currently in the table. Passing the path allows you to state which nested objects to return.