
National Highways data quality requirements json file
The following file was developed by National Highways to check the quality of AGS data prior to submission by supply chain contractors to ensure that the AGS data files can be uploaded to the National Highways asset management system (GDMS). It was created to reduce error and to improve efficiency and the timeliness of data uploads.
National Highways provided support to develop the Data Quality functionality in the Desktop Validator application.
The use of the file is embedded into the desktop validator.
The requirements for AGS4 format files to be uploaded to GDMS are as follows:
Files must be valid and the following requirements must be satisfied:
- PROJ.PROJ_ID is mandatory
- TRAN.TRAN_ISNO is mandatory
- TRAN.TRAN_AGS is mandatory
- LOCA.LOCA_TYPE >= 75% populated
- LOCA.LOCA_GL >=75% populated
- LOCA.LOCA_FDEP >=75% populated with positive numbers
- LOCA.LOCA_STAR and LOCA.LOCA_END are format 'YYYY-MM-DD'
- LOCA.LOCA_NATE and LOCA.LOCA_NATN >= 75% populated
- LOCA.LOCA_NATE and LOCA.LOCA_NATN >= 75% populated with extents of GB (bounding box)
These have been translated into the json format of data quality requirements as shown below:
{
"file data": {
"profile name": "GDMS check",
"version": "1.1",
"purpose": "Validation for upload onto GDMS",
"organisation": "National Highways",
"rqmts": [
{
"ref": "NH_GDMS_1",
"description": "Check if PROJECT ID Exists in PROJ table",
"dimension": "consistency",
"sql": "SELECT LENGTH(PROJ_ID) FROM PROJ;",
"error": "PROJ.PROJ_ID is empty",
"pass_criteria": [">", 0]
},
{
"ref": "NH_GDMS_2",
"description": "Check if TRAN_ISNO Exists in TRAN table",
"dimension": "consistency",
"sql": "SELECT LENGTH(TRAN_ISNO) FROM TRAN;",
"error": "TRAN.TRAN_ISNO is empty",
"pass_criteria": [">", 0]
},
{
"ref": "NH_GDMS_3",
"description": "Check if TRAN_AGS Exists in TRAN table",
"dimension": "consistency",
"sql": "SELECT LENGTH(TRAN_AGS) from TRAN;",
"error": "TRAN.TRAN_AGS is empty",
"pass_criteria": [">", 0]
},
{
"ref": "NH_GDMS_4",
"description": "Check that LOCA_TYPE in LOCA table contains no empty values",
"dimension": "consistency",
"sql": "SELECT COUNT(*) FROM LOCA WHERE length(LOCA_TYPE) = 0;",
"error": "LOCA_TYPE column has empty values",
"pass_criteria": ["=", 0]
},
{
"ref": "NH_GDMS_5",
"description": "Check that 75% of LOCA_GL in LOCA table is populated",
"dimension": "consistency",
"sql": "SELECT printf ('%.2f', ((SELECT COUNT(*) FROM LOCA)-(SELECT COUNT(*) FROM LOCA WHERE length(LOCA_GL) = 0 or LOCA_GL is null))/((SELECT COUNT(*) FROM LOCA) / 100.0));",
"error": "LOCA_GL column has missing values",
"pass_criteria": [">=", 75]
},
{
"ref": "NH_GDMS_6",
"description": "Check that 75% of LOCA_FDEP in LOCA table contains positive numbers",
"dimension": "consistency",
"sql": "SELECT printf ('%.2f', ((SELECT COUNT(*) FROM LOCA)-(SELECT COUNT(*) FROM LOCA WHERE LOCA_FDEP < 0 or LOCA_FDEP is null))/((SELECT COUNT(*) FROM LOCA) / 100.0));",
"error": "LOCA_FDEP column has missing or negative values",
"pass_criteria": [">=", 75]
},
{
"ref": "NH_GDMS_7",
"description": "Check that LOCA_STAR and LOCA_ENDD data types are in the format 'yyyy-mm-dd'",
"dimension": "validity",
"sql": "SELECT COUNT(*) FROM UNTY WHERE (heading = 'LOCA_STAR' and unit <> 'yyyy-mm-dd') or (heading = 'LOCA_ENDD' and unit <> 'yyyy-mm-dd');",
"error": "LOCA_TYPE column has empty values",
"pass_criteria": ["=", 0]
},
{
"ref": "NH_GDMS_8",
"description": "Percentage of exploratory holes with (both) (OS) co-ordinates: pass >=75%",
"dimension": "consistency",
"sql": "SELECT printf ('%.2f', (SELECT COUNT(*) FROM LOCA WHERE (length(LOCA_NATE) <> 0) AND length(LOCA_NATN) <> 0) / ( (SELECT COUNT(*) FROM LOCA) / 100.0));",
"error": "Less than 75% of exploratory holes have both (OS) co-ordinates",
"pass_criteria": [">=", 75]
},
{
"ref": "NH_GDMS_9",
"description": "75% populated within extents of England",
"dimension": "consistency",
"sql": "SELECT printf ('%.2f', (SELECT COUNT(*) FROM LOCA WHERE (LOCA_NATE > 148900) AND (LOCA_NATE < 655250) AND (LOCA_NATN > 31250) AND (LOCA_NATN < 657150))/( (SELECT COUNT(*) FROM LOCA) / 100.0));",
"error": "Co-ordinates lie outside the National Highways network in England",
"pass_criteria": [">=", 75]
}
]
}
}