diff --git a/pipeline/plugins/extractors/tap-uk-gias/tap_uk_gias/tap.py b/pipeline/plugins/extractors/tap-uk-gias/tap_uk_gias/tap.py index 1f014e3..7c01b78 100644 --- a/pipeline/plugins/extractors/tap-uk-gias/tap_uk_gias/tap.py +++ b/pipeline/plugins/extractors/tap-uk-gias/tap_uk_gias/tap.py @@ -26,14 +26,16 @@ class GIASEstablishmentsStream(Stream): replication_key = None # Schema is wide (~250 columns); we declare key columns and pass through the rest + # All columns are read as strings from CSV; dbt staging models handle type casting. + # Only URN is cast to int in get_records() for the primary key. schema = th.PropertiesList( th.Property("URN", th.IntegerType, required=True), th.Property("EstablishmentName", th.StringType), th.Property("TypeOfEstablishment (name)", th.StringType), th.Property("PhaseOfEducation (name)", th.StringType), - th.Property("LA (code)", th.IntegerType), + th.Property("LA (code)", th.StringType), th.Property("LA (name)", th.StringType), - th.Property("EstablishmentNumber", th.IntegerType), + th.Property("EstablishmentNumber", th.StringType), th.Property("EstablishmentStatus (name)", th.StringType), th.Property("Postcode", th.StringType), ).to_dict()