33# Generate Hubverse-formatted target data for the FluSight Forecast hub.
44#
55# USAGE
6- # Rscript "get_target_data_hubverse.R" as_of include_after
6+ # Rscript "get_target_data_hubverse.R" as_of oracle_include_after
77#
88# ARGUMENTS
99# as_of: Optional "YYYY-MM-DD" string.
1010# If provided, read archived target data instead of the latest version.
11- # include_after : Optional "YYYY-MM-DD" string.
12- # Exclude target data dated on or earlier than this date . Defaults to "2024-11-01".
11+ # oracle_include_after : Optional "YYYY-MM-DD" string.
12+ # Starting date for oracle-output target data . Defaults to "2024-11-01".
1313# Note: the modeling tasks defined in tasks.json were updated for the 2024-2025 flu
14- # season, so don't run this script with an include_after date before 2024-11-01.
14+ # season, so don't run this script with an oracle_include_after date before 2024-11-01.
1515#
1616# EXAMPLE
1717# Generate Hubverse target data based on the latest available FluSight target-hospital-admissions.csv
@@ -49,7 +49,7 @@ get_location_data <- function() {
4949# ' @param target_data_path Path to the target data directory.
5050# ' @param as_of Optional "YYYY-MM-DD" string. If provided, read archived target data instead of the latest version.
5151# ' @return A dataframe
52- get_base_target_data <- function (include_after , target_data_path , as_of = NULL ) {
52+ get_base_target_data <- function (target_data_path , as_of = NULL ) {
5353 # if as_of is not provided, read the latest target data
5454 if (is.null(as_of ) || is.na(as_of )) {
5555 get_latest <- TRUE
@@ -135,18 +135,30 @@ create_time_series_target_data <- function(weekly_data, location_data) {
135135}
136136
137137# ' @description
138- # ' `create_oracle_output_target_data` creates Hubverse-formatted oracle output
139- # ' target data.
138+ # ' `create_oracle_output_target_data` uses Hubverse-formatted time series target data to generate
139+ # ' a corresponding set of oracle output data. If the time series target data doesn't contain
140+ # ' any records with a target_end_date greater than the specified oracle_include_after date,
141+ # ' return an empty oracle output dataframe.
140142# '
141143# ' @param time_series_target Dataframe with the time series target data.
144+ # ' @param oracle_include_after "YYYY-MM-DD" string. Start date for oracle output target data.
142145# ' @return A dataframe
143- create_oracle_output_target_data <- function (time_series_target ) {
146+ create_oracle_output_target_data <- function (time_series_target , oracle_include_after ) {
147+ oracle_output_cols <- c(
148+ " target" , " location" , " horizon" , " target_end_date" , " output_type" , " output_type_id" , " oracle_value" , " as_of" )
149+
150+ # Filter time series rows to those that match the oracle_include_after criteria
151+ time_series_target <- time_series_target [time_series_target $ target_end_date > oracle_include_after , ]
152+ if (nrow(time_series_target ) == 0 ) {
153+ empty_oracle_output <- data.frame (matrix (ncol = length(oracle_output_cols ), nrow = 0 ))
154+ colnames(empty_oracle_output ) <- oracle_output_cols
155+ return (empty_oracle_output )
156+ }
157+
144158 oracle_output_wk_inc <- create_oracle_output_wk_inc(time_series_target )
145159 oracle_output_rate_change <- calc_oracle_output_rate_change(time_series_target )
146160
147161 oracle_output <- dplyr :: bind_rows(oracle_output_wk_inc , oracle_output_rate_change )
148- oracle_output_cols <- c(
149- " target" , " location" , " horizon" , " target_end_date" , " output_type" , " output_type_id" , " oracle_value" , " as_of" )
150162 oracle_output <- oracle_output [oracle_output_cols ]
151163
152164 oracle_output
@@ -487,14 +499,14 @@ run_target_data_tests <- function() {
487499# ' by the FluSight Forecast hub.
488500# '
489501# ' @param as_of Optional "YYYY-MM-DD" string. If provided, read archived target data instead of the latest version.
490- # ' @param include_after "YYYY-MM_DD" string. Base target data dated on or earlier will be excluded.
502+ # ' @param oracle_include_after "YYYY-MM_DD" string. Base target data dated on or earlier will be excluded.
491503# ' @param target_data_path Path to the target data directory.
492504# ' @return NULL
493- create_target_data <- function (as_of = NULL , include_after = " 2024-11-01" , target_data_path ) {
505+ create_target_data <- function (as_of = NULL , oracle_include_after = " 2024-11-01" , target_data_path ) {
494506 # Validate input params
495507 tryCatch(
496- as.Date(include_after , format = " %Y-%m-%d" ),
497- error = function (e ) stop(paste0(" Invalid date format for include_after . Please use 'YYYY-MM-DD': " , include_after ))
508+ as.Date(oracle_include_after , format = " %Y-%m-%d" ),
509+ error = function (e ) stop(paste0(" Invalid date format for oracle_include_after . Please use 'YYYY-MM-DD': " , oracle_include_after ))
498510 )
499511 if (! is.null(as_of )) {
500512 tryCatch(
@@ -507,10 +519,9 @@ create_target_data <- function(as_of = NULL, include_after = "2024-11-01", targe
507519 time_series_file <- file.path(target_data_path , " time-series.csv" )
508520 oracle_output_file <- file.path(target_data_path , " oracle-output.csv" )
509521
510- # Get original target data from FluSight hub and filter using include_after
522+ # Get original target data from FluSight hub
511523 location_data <- get_location_data()
512524 weekly_data_all <- get_base_target_data(target_data_path = target_data_path , as_of = as_of )
513- weekly_data_all <- weekly_data_all [weekly_data_all $ date > include_after , ]
514525 as_of <- weekly_data_all $ as_of [1 ]
515526
516527 # Specify sort order for target data files (not absolutely necessary, but helps human readibility and diffs)
@@ -530,7 +541,7 @@ create_target_data <- function(as_of = NULL, include_after = "2024-11-01", targe
530541 updated_time_series <- arrange_cols(updated_time_series , time_series_col_order )
531542
532543 # Create oracle output data
533- oracle_output_target <- create_oracle_output_target_data(time_series_target )
544+ oracle_output_target <- create_oracle_output_target_data(time_series_target , oracle_include_after )
534545 oracle_output_target <- arrange_cols(oracle_output_target , oracle_col_order )
535546
536547 # Re-order the target-data columns to reflect the files' sort order
@@ -561,12 +572,14 @@ create_target_data <- function(as_of = NULL, include_after = "2024-11-01", targe
561572
562573args <- commandArgs(trailingOnly = TRUE )
563574as_of <- args [1 ]
564- include_after <- args [2 ]
565-
566- # if include_after date is not provided, default to the beginning
567- # of the 2024-2025 flu season
568- if (is.na(include_after ) || is.null(include_after )) {
569- include_after <- " 2024-11-01"
575+ # next line is for testing
576+ oracle_include_after <- args [2 ]
577+
578+ # If oracle_include_after date is not provided, default to the beginning
579+ # of the 2024-2025 flu season (note: mandatory reporting was reinstated as
580+ # of 2024-11-01)
581+ if (is.na(oracle_include_after ) || is.null(oracle_include_after )) {
582+ oracle_include_after <- " 2024-10-31"
570583}
571584
572585# Run tests
@@ -581,4 +594,4 @@ if (isTRUE(all(ok))) {
581594
582595# Create the target data
583596target_data_path <- file.path(here :: here(), " target-data" )
584- create_target_data(as_of = as_of , include_after = include_after , target_data_path = target_data_path )
597+ create_target_data(as_of = as_of , oracle_include_after = oracle_include_after , target_data_path = target_data_path )
0 commit comments