--------------------------------------- -- build-covidnyt.sql -- Alex Szalay, 2020-03-30 --------------------------------------- use covidnyt go set nocount on go --=========================================================================================== -- load all the FIPS codes for the States first -- state FIPS codes: https://www.nrcs.usda.gov/wps/portal/nrcs/detail/?cid=nrcs143_013696 --=========================================================================================== if OBJECT_ID(N'dbo.States', N'U') IS NOT NULL drop table dbo.States; go -- create table States ( state varchar(50) NOT NULL, code varchar(6) NOT NULL, fips int NOT NULL ) go bulk insert States from '\\dsp030\r$\covid-nyt\data\state-fips.csv' with (FORMAT='CSV', FIRSTROW=2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n'); go --============================================================ -- New York Times covid-19 data bank -- data source: https://github.com/nytimes/covid-19-data --============================================================ ----------------------------------------- -- create initial statistics table ----------------------------------------- if OBJECT_ID(N'dbo.statsC_RAW', N'U') IS NOT NULL drop table dbo.statsC_RAW; go -- CREATE TABLE [dbo].[statsC_RAW]( [date] [varchar](50) NULL, [county] [varchar](50) NULL, [state] [varchar](50) NULL, [fips] [varchar](50) NULL, [cases] [varchar](50) NULL, [deaths] [varchar](50) NULL ) ON [PRIMARY] GO if OBJECT_ID(N'dbo.statsS_RAW', N'U') IS NOT NULL drop table dbo.statsS_RAW; go -- CREATE TABLE [dbo].[statsS_RAW]( [date] [varchar](50) NULL, [state] [varchar](50) NULL, [fips] [varchar](50) NULL, [cases] [varchar](50) NULL, [deaths] [varchar](50) NULL ) ON [PRIMARY] GO bulk insert statsC_RAW from '\\dsp030\r$\covid-nyt\data\us-counties-04-01.csv' with (FORMAT='CSV', FIRSTROW=2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n'); go bulk insert statsS_RAW from '\\dsp030\r$\covid-nyt\data\us-states-04-01.csv' with (FORMAT='CSV', FIRSTROW=2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n'); go ------------------------------------------- -- create the statistics for each county ------------------------------------------- if OBJECT_ID(N'dbo.[StatsC]', N'U') IS NOT NULL drop table dbo.[StatsC]; go -- create table [dbo].[StatsC]( [date] [date] NOT NULL, [dd] int NOT NULL, [county] [varchar](50) NOT NULL, [state] [varchar](50) NOT NULL, [fips] [int] NULL, [cases] [int] NOT NULL, [deaths] [int] NOT NULL ) ON [PRIMARY] go if OBJECT_ID(N'dbo.[StatsS]', N'U') IS NOT NULL drop table dbo.[StatsS]; go -- create table [dbo].[StatsS]( [date] [date] NOT NULL, [dd] int NOT NULL, [state] [varchar](50) NOT NULL, [fips] [int] NULL, [cases] [int] NOT NULL, [deaths] [int] NOT NULL ) ON [PRIMARY] go ---------------------------------------- -- get the first date ----------------------- declare @d0 date='2020-01-21'; -- insert StatsC with (tablock) select cast([date] as date), datediff(dd,@d0,[date]) dd, county, [state], fips, cast(cases as int) cases, cast(deaths as int) deaths from statsC_RAW order by dd, fips -- insert StatsS with (tablock) select cast([date] as date), datediff(dd,@d0,[date]) dd, [state], fips, cast(cases as int) cases, cast(deaths as int) deaths from statsS_RAW order by dd, fips --======================================================== -- update statsC (needs to be done after each update) --======================================================== -- assign 29999 to 'Kansas City' update statsC set fips = 29999 where county='Kansas City' go -- assign 36999 to 'New York City' update statsC set fips = 36999 where county='New York City' go -- assign the state fips*1000 to the Unkown counties in statsC update c set fips = s.fips*1000 from statsC c, States s where county='Unknown' and c.state = s.state go -- ------------------ --show summary ------------------ select 'Counties' name, count(*) rows, max(date) from statsC union all select 'States' name, count(*) rows, max(date) from statsS go /* --------------------------------------- -- test, should return zero rows --------------------------------------- select * from statsC where fips is NULL */