--------------------------------------- -- build-census.sql -- Alex Szalay, 2020-03-30 --------------------------------------- use covidnyt go set nocount on go --========================================================================================================= -- data source: https://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?src=bkmk --========================================================================================================= ------------------------- -- load the census data ------------------------- if OBJECT_ID(N'dbo.census_RAW', N'U') IS NOT NULL drop table dbo.census_RAW; go -- create table census_RAW ( GEOId varchar(50) NULL, GEOId2 varchar(50) NULL, GEOdisplayLabel varchar(50) NULL, targetGeoId varchar(50) NULL, targetGeoId2 varchar(50) NULL, displayLabel1 varchar(1024) NULL, displayLabel2 varchar(128) NULL, HD01 varchar(50) NULL, HD02 varchar(50) NULL, SUBHD0301 varchar(50) NULL, SUBHD0302 varchar(50) NULL, SUBHD0303 varchar(50) NULL, SUBHD0401 varchar(50) NULL, SUBHD0402 varchar(50) NULL ) go -- BULK INSERT census_RAW FROM 'H:\covid-nyt\data\DEC_10_SF1_GCTPH1.US05PR.csv' WITH (FORMAT='CSV', FIRSTROW=2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n'); go ----------------------------- -- load the censusData table ----------------------------- if OBJECT_ID(N'dbo.censusData', N'U') IS NOT NULL drop table dbo.censusData; go -- create table dbo.censusData ( fips int NOT NULL, [State] varchar(128) NULL, [County] varchar(50) NULL, [Population] int NOT NULL, HousingUnits int NOT NULL, TotalArea real NOT NULL, WaterArea real NOT NULL, LandArea real NOT NULL, Popdensity real NOT NULL, HouseDensity real NOT NULL ) go -- insert [censusData] select * from ( select cast(targetGeoId2 as int) fips, displayLabel2 [State], NULL [County], cast(HD01 as int) [Population], cast(HD02 as int) [HousingUnits], cast(SUBHD0301 as real) [TotalArea], cast(SUBHD0302 as real) [WaterArea], cast(SUBHD0303 as real) [LandArea], cast(SUBHD0401 as real) [PopDensity], cast(SUBHD0402 as real) [HouseDensity] from census_RAW where datalength(targetGeoId2)=2 union all select cast(targetGeoId2 as int) fips, replace(replace(displayLabel1, 'United States - ',''),' - '+displayLabel2,'') [State], replace(displayLabel2, ' County','') [County], cast(HD01 as int) [Population], cast(HD02 as int) [HousingUnits], cast(SUBHD0301 as real) [TotalArea], cast(SUBHD0302 as real) [WaterArea], cast(SUBHD0303 as real) [LandArea], cast(SUBHD0401 as real) [PopDensity], cast(SUBHD0402 as real) [HouseDensity] from census_RAW where datalength(targetGeoId2)=5 ) x order by fips go --====================================================== -- figure out which counties are merged in the NYT data -- big cities have a fips = NULL designation -- we also have Unknown cases for each state with NULL fips --====================================================== /* select distinct state, county from statsC where fips is null -------------------------------------------- -- modify statsC where fips is NULL -- New York, New York City => 36999 -- Missouri, Kansas City => 29999 -- State, Unknown => State_fips *1000 -------------------------------------------- -- modify censusData -- add aggregates for -- New York, New York City => 36999 -- Missouri, Kansas City => 29999 -------------------------------------------- */ if OBJECT_ID(N'dbo.FIPS', N'U') IS NOT NULL drop table dbo.FIPS; go select distinct s.fips into FIPS from statsC s, CensusData c where s.fips=c.fips order by fips go ------------------------------------------------------- -- create the Kansas City entry into censusData -- The PopDensity should be weighted by the population ------------------------------------------------------- if OBJECT_ID(N'dbo.KC', N'U') IS NOT NULL drop table dbo.KC; go -- select * into KC from censusData where (state='Missouri' and county in ('Jackson','Cass','Clay', 'Platte')) or (state='Kansas' and county in ('Johnson','Leavenworth','Wyandotte') ) go -- insert censusData select 29999 fips, 'Missouri' [State], 'Kansas City' county, sum(Population) [Population], sum(HousingUnits) HousingUnits, sum(TotalArea) TotalArea, sum(WaterArea) WaterArea, sum(LandArea) LandArea, sum(cast(Population as float)*cast(PopDensity as float))/sum(Population) PopDensity, sum(cast(Population as float)*HouseDensity)/sum(Population) HouseDensity from censusData where fips in (select fips from KC) go -- flip the fips for these counties update censusData set fips = -fips where fips in (select fips from KC) go -- if OBJECT_ID(N'dbo.KC', N'U') IS NOT NULL drop table dbo.KC; go ---------------------------------------------- -- create census entry for New York City ---------------------------------------------- if OBJECT_ID(N'dbo.NYC', N'U') IS NOT NULL drop table dbo.NYC; go -- select * into NYC from censusData where state='New York' and fips not in (select fips from FIPS) and county not in ('Yates','Seneca','Richmond') and fips>100 go -- insert censusData select 36999 fips, min(state) state, 'New York City' county, sum(Population) Population, sum(HousingUnits) HousingUnits, sum(TotalArea) TotalArea, sum(WaterArea) WaterArea, sum(LandArea) LandArea, sum(cast(Population as float)*cast(PopDensity as float))/sum(Population) PopDensity, sum(HousingUnits)/sum(LandArea) HouseDensity from censusData where fips in (select fips from NYC) go -- flip the fips for these counties update censusData set fips = -fips where fips in (select fips from NYC) go if OBJECT_ID(N'dbo.NYC', N'U') IS NOT NULL drop table dbo.NYC; go -- if OBJECT_ID(N'dbo.FIPS', N'U') IS NOT NULL drop table dbo.FIPS; go -- if OBJECT_ID(N'dbo.census_RAW', N'U') IS NOT NULL drop table dbo.census_RAW; go --=========================================================================================== -- state FIPS codes: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/?cid=nrcs143_013696 --=========================================================================================== if OBJECT_ID(N'dbo.states', N'U') IS NOT NULL drop table dbo.states; go -- create table States ( state varchar(50) NOT NULL, code varchar(6) NOT NULL, fips int NOT NULL ) go bulk insert States from 'E:\covid-nyt\data\state-fips.csv' with (FORMAT='CSV', FIRSTROW=2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n'); go