----------------------------------------- -- 04-covid-incr-update.sql -- -- Alex Szalay, Baltimore, 2020-04-03 -- incremental load of the data ----------------------------------------- use covidnyt go set nocount on go --============================================================ -- RUN THIS CODE EVERY DAY WITH THE NEWEST DATA FROM THE NYT --============================================================ --============================================================ -- data source: https://github.com/nytimes/covid-19-data -- convert the line feed to Windows before proceeding --============================================================ ----------------------------------------- -- create RAW statistics tables ----------------------------------------- if OBJECT_ID(N'dbo.statsC_RAW', N'U') IS NOT NULL drop table dbo.statsC_RAW; go -- CREATE TABLE [dbo].[statsC_RAW]( [date] [varchar](50) NULL, [county] [varchar](50) NULL, [state] [varchar](50) NULL, [fips] [varchar](50) NULL, [cases] [varchar](50) NULL, [deaths] [varchar](50) NULL ) ON [PRIMARY] GO if OBJECT_ID(N'dbo.statsS_RAW', N'U') IS NOT NULL drop table dbo.statsS_RAW; go -- CREATE TABLE [dbo].[statsS_RAW]( [date] [varchar](50) NULL, [state] [varchar](50) NULL, [fips] [varchar](50) NULL, [cases] [varchar](50) NULL, [deaths] [varchar](50) NULL ) ON [PRIMARY] GO bulk insert statsC_RAW from '\\dsp030\r$\covid-nyt\data\us-counties-04-02.csv' with (FORMAT='CSV', FIRSTROW=2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n'); go bulk insert statsS_RAW from '\\dsp030\r$\covid-nyt\data\us-states-04-02.csv' with (FORMAT='CSV', FIRSTROW=2, FIELDTERMINATOR = ',', ROWTERMINATOR = '\n'); go --------------------------- -- only propagate changes --------------------------- declare @d0 date='2020-01-21'; declare @rows int, @last date; declare @summary table (name varchar(50), rows int, last date) -- insert @summary select * from ( select 'States' name, count(*) rows, max(date) last from StatsS union all select 'Counties'name, count(*) rows, max(date) last from StatsC ) x select @rows = count(*), @last = max(date) from statsC if (@rows=0) insert statsC with (tablock) select cast([date] as date), datediff(dd,@d0,[date]) dd, county, [state], fips, cast(cases as int) cases, cast(deaths as int) deaths from statsC_RAW order by dd,fips else insert statsC with (tablock) select cast([date] as date), datediff(dd,@d0,[date]) dd, county, [state], fips, cast(cases as int) cases, cast(deaths as int) deaths from statsC_RAW where date>@last order by dd,fips -- select @rows = count(*), @last = max(date) from statsS if (@rows=0) insert statsS with (tablock) select cast([date] as date), datediff(dd,@d0,[date]) dd, [state], fips, cast(cases as int) cases, cast(deaths as int) deaths from statsS_RAW order by dd,fips else insert statsS with (tablock) select cast([date] as date), datediff(dd,@d0,[date]) dd, [state], fips, cast(cases as int) cases, cast(deaths as int) deaths from statsS_RAW where date>@last order by dd,fips -- insert @summary select * from ( select 'States' name, count(*) rows, max(date) last from StatsS union all select 'Counties'name, count(*) rows, max(date) last from StatsC ) x select * from @summary go ----------------------------------- -- assign 36999 to 'New York City' ------------------------------------ update statsC set fips = 36999 where county='New York City' and fips is NULL go ---------------------------------- -- assign 29999 to 'Kansas City' ---------------------------------- update statsC set fips = 29999 where county='Kansas City' and fips is NULL go --------------------------------------------------------- -- assign the state fips*1000 to the Unkown counties in statsC --------------------------------------------------------- update c set fips = s.fips*1000 from statsC c, States s where county='Unknown' and c.state = s.state and c.fips is NULL go if OBJECT_ID(N'dbo.statsC_RAW', N'U') IS NOT NULL drop table dbo.statsC_RAW; go -- if OBJECT_ID(N'dbo.statsS_RAW', N'U') IS NOT NULL drop table dbo.statsS_RAW; go --