33# Generate Hubverse-formatted target data for the FluSight Forecast hub.
44#
55# USAGE
6- # Rscript "get_target_data_hubverse.R" as_of include_after
6+ # Rscript "get_target_data_hubverse.R" as_of oracle_include_after
77#
88# ARGUMENTS
99# as_of: Optional "YYYY-MM-DD" string.
1010# If provided, read archived target data instead of the latest version.
11- # include_after : Optional "YYYY-MM-DD" string.
12- # Exclude target data dated on or earlier than this date . Defaults to "2024-11-01".
11+ # oracle_include_after : Optional "YYYY-MM-DD" string.
12+ # Starting date for oracle-output target data . Defaults to "2024-11-01".
1313# Note: the modeling tasks defined in tasks.json were updated for the 2024-2025 flu
14- # season, so don't run this script with an include_after date before 2024-11-01.
14+ # season, so don't run this script with an oracle_include_after date before 2024-11-01.
1515#
1616# EXAMPLE
1717# Generate Hubverse target data based on the latest available FluSight target-hospital-admissions.csv
@@ -49,7 +49,7 @@ get_location_data <- function() {
4949# ' @param target_data_path Path to the target data directory.
5050# ' @param as_of Optional "YYYY-MM-DD" string. If provided, read archived target data instead of the latest version.
5151# ' @return A dataframe
52- get_base_target_data <- function (include_after , target_data_path , as_of = NULL ) {
52+ get_base_target_data <- function (target_data_path , as_of = NULL ) {
5353 # if as_of is not provided, read the latest target data
5454 if (is.null(as_of ) || is.na(as_of )) {
5555 get_latest <- TRUE
@@ -135,18 +135,30 @@ create_time_series_target_data <- function(weekly_data, location_data) {
135135}
136136
137137# ' @description
138- # ' `create_oracle_output_target_data` creates Hubverse-formatted oracle output
139- # ' target data.
138+ # ' `create_oracle_output_target_data` uses Hubverse-formatted time series target data to generate
139+ # ' a corresponding set of oracle output data. If the time series target data doesn't contain
140+ # ' any records with a target_end_date greater than the specified oracle_include_after date,
141+ # ' return an empty oracle output dataframe.
140142# '
141143# ' @param time_series_target Dataframe with the time series target data.
144+ # ' @param oracle_include_after "YYYY-MM-DD" string. Start date for oracle output target data.
142145# ' @return A dataframe
143- create_oracle_output_target_data <- function (time_series_target ) {
146+ create_oracle_output_target_data <- function (time_series_target , oracle_include_after ) {
147+ oracle_output_cols <- c(
148+ " target" , " location" , " horizon" , " target_end_date" , " output_type" , " output_type_id" , " oracle_value" , " as_of" )
149+
150+ # Filter time series rows to those that match the oracle_include_after criteria
151+ time_series_target <- time_series_target [time_series_target $ target_end_date > oracle_include_after , ]
152+ if (nrow(time_series_target ) == 0 ) {
153+ empty_oracle_output <- data.frame (matrix (ncol = length(oracle_output_cols ), nrow = 0 ))
154+ colnames(empty_oracle_output ) <- oracle_output_cols
155+ return (empty_oracle_output )
156+ }
157+
144158 oracle_output_wk_inc <- create_oracle_output_wk_inc(time_series_target )
145159 oracle_output_rate_change <- calc_oracle_output_rate_change(time_series_target )
146160
147161 oracle_output <- dplyr :: bind_rows(oracle_output_wk_inc , oracle_output_rate_change )
148- oracle_output_cols <- c(
149- " target" , " location" , " horizon" , " target_end_date" , " output_type" , " output_type_id" , " oracle_value" , " as_of" )
150162 oracle_output <- oracle_output [oracle_output_cols ]
151163
152164 oracle_output
@@ -487,14 +499,14 @@ run_target_data_tests <- function() {
487499# ' by the FluSight Forecast hub.
488500# '
489501# ' @param as_of Optional "YYYY-MM-DD" string. If provided, read archived target data instead of the latest version.
490- # ' @param include_after "YYYY-MM_DD" string. Base target data dated on or earlier will be excluded.
502+ # ' @param oracle_include_after "YYYY-MM_DD" string. Base target data dated on or earlier will be excluded.
491503# ' @param target_data_path Path to the target data directory.
492504# ' @return NULL
493- create_target_data <- function (as_of = NULL , include_after = " 2024-11-01" , target_data_path ) {
505+ create_target_data <- function (as_of = NULL , oracle_include_after = " 2024-11-01" , target_data_path ) {
494506 # Validate input params
495507 tryCatch(
496- as.Date(include_after , format = " %Y-%m-%d" ),
497- error = function (e ) stop(paste0(" Invalid date format for include_after . Please use 'YYYY-MM-DD': " , include_after ))
508+ as.Date(oracle_include_after , format = " %Y-%m-%d" ),
509+ error = function (e ) stop(paste0(" Invalid date format for oracle_include_after . Please use 'YYYY-MM-DD': " , oracle_include_after ))
498510 )
499511 if (! is.null(as_of )) {
500512 tryCatch(
@@ -504,15 +516,12 @@ create_target_data <- function(as_of = NULL, include_after = "2024-11-01", targe
504516 }
505517
506518 # Where we'll save things
507- time_series_path <- file.path(target_data_path , " time-series" )
508- time_series_file <- file.path(time_series_path , " time-series.csv" )
509- oracle_output_path <- file.path(target_data_path , " oracle-output" )
510- oracle_output_file <- file.path(oracle_output_path , " oracle-output.csv" )
519+ time_series_file <- file.path(target_data_path , " time-series.csv" )
520+ oracle_output_file <- file.path(target_data_path , " oracle-output.csv" )
511521
512- # Get original target data from FluSight hub and filter using include_after
522+ # Get original target data from FluSight hub
513523 location_data <- get_location_data()
514524 weekly_data_all <- get_base_target_data(target_data_path = target_data_path , as_of = as_of )
515- weekly_data_all <- weekly_data_all [weekly_data_all $ date > include_after , ]
516525 as_of <- weekly_data_all $ as_of [1 ]
517526
518527 # Specify sort order for target data files (not absolutely necessary, but helps human readibility and diffs)
@@ -532,7 +541,7 @@ create_target_data <- function(as_of = NULL, include_after = "2024-11-01", targe
532541 updated_time_series <- arrange_cols(updated_time_series , time_series_col_order )
533542
534543 # Create oracle output data
535- oracle_output_target <- create_oracle_output_target_data(time_series_target )
544+ oracle_output_target <- create_oracle_output_target_data(time_series_target , oracle_include_after )
536545 oracle_output_target <- arrange_cols(oracle_output_target , oracle_col_order )
537546
538547 # Re-order the target-data columns to reflect the files' sort order
@@ -542,11 +551,8 @@ create_target_data <- function(as_of = NULL, include_after = "2024-11-01", targe
542551 dplyr :: select(all_of(oracle_col_order ), everything())
543552
544553 # Write updated target data files
545- if (! dir.exists(time_series_path )) {
546- dir.create(time_series_path , recursive = TRUE )
547- }
548- if (! dir.exists(oracle_output_path )) {
549- dir.create(oracle_output_path , recursive = TRUE )
554+ if (! dir.exists(target_data_path )) {
555+ dir.create(target_data_path , recursive = TRUE )
550556 }
551557
552558 tryCatch({
@@ -566,12 +572,14 @@ create_target_data <- function(as_of = NULL, include_after = "2024-11-01", targe
566572
567573args <- commandArgs(trailingOnly = TRUE )
568574as_of <- args [1 ]
569- include_after <- args [2 ]
570-
571- # if include_after date is not provided, default to the beginning
572- # of the 2024-2025 flu season
573- if (is.na(include_after ) || is.null(include_after )) {
574- include_after <- " 2024-11-01"
575+ # next line is for testing
576+ oracle_include_after <- args [2 ]
577+
578+ # If oracle_include_after date is not provided, default to the beginning
579+ # of the 2024-2025 flu season (note: mandatory reporting was reinstated as
580+ # of 2024-11-01)
581+ if (is.na(oracle_include_after ) || is.null(oracle_include_after )) {
582+ oracle_include_after <- " 2024-10-31"
575583}
576584
577585# Run tests
@@ -586,4 +594,4 @@ if (isTRUE(all(ok))) {
586594
587595# Create the target data
588596target_data_path <- file.path(here :: here(), " target-data" )
589- create_target_data(as_of = as_of , include_after = include_after , target_data_path = target_data_path )
597+ create_target_data(as_of = as_of , oracle_include_after = oracle_include_after , target_data_path = target_data_path )
0 commit comments