@@ -28,79 +28,95 @@ calculate <- S7::new_generic("calculate","x")
2828# ' @keywords internal
2929S7 :: method(create_calendar ,ti ) <- function (x ){
3030
31+ # 1. Determine the Anchor Start Date --------------------------------------
32+ # For 5-5-4 calendars, we must anchor to the fiscal start (Sunday closest to Feb 1).
33+ # For standard calendars, we use the natural data minimum.
3134
32- # # neeed to add in standard and non-standard logic here.
33-
34-
35- if (x @ datum @ calendar_type != " standard" ){
36-
35+ if (x @ datum @ calendar_type != " standard" ) {
3736 min_year <- lubridate :: year(x @ datum @ min_date )
38-
3937 start_date <- closest_sunday_feb1(min_year )
4038
41- if (min_year < lubridate :: year(start_date )){
42-
43- start_date <- closest_sunday_feb1(min_year - 1 )
44-
39+ # Ensure the anchor doesn't start after our actual data
40+ if (min_year < lubridate :: year(start_date )) {
41+ start_date <- closest_sunday_feb1(min_year - 1 )
4542 }
43+ } else {
44+ start_date <- x @ datum @ min_date
4645 }
4746
47+ # 2. Summarize Raw Data ---------------------------------------------------
48+ # Aggregate the source data to the target time unit (day, month, etc.)
49+ # before building the scaffold.
4850
49- # # summarize data table
5051 summary_dbi <- x @ datum @ data | >
5152 dplyr :: ungroup() | >
5253 make_db_tbl() | >
5354 dplyr :: mutate(
54- date = lubridate :: floor_date(!! x @ datum @ date_quo ,unit = !! x @ time_unit @ value )
55- , time_unit = !! x @ time_unit @ value
55+ date = lubridate :: floor_date(!! x @ datum @ date_quo , unit = !! x @ time_unit @ value ),
56+ time_unit = !! x @ time_unit @ value
5657 ) | >
5758 dplyr :: summarise(
58- !! x @ value @ value_vec : = sum(!! x @ value @ value_quo ,na.rm = TRUE )
59- , .by = c(date ,!!! x @ datum @ group_quo )
59+ !! x @ value @ value_vec : = sum(!! x @ value @ value_quo , na.rm = TRUE ),
60+ .by = c(date , !!! x @ datum @ group_quo )
6061 )
6162
62- # create calendar table
63-
64- if (x @ datum @ calendar_type == " standard" ){
63+ # 3. Define "Active Life" Bounds ------------------------------------------
64+ # Optimization: Calculate the first and last activity per group.
65+ # This prevents generating thousands of 'zero' rows for products that
66+ # didn't exist yet or were discontinued.
6567
68+ active_bounds <- summary_dbi | >
69+ dplyr :: summarise(
70+ min_g = min(date , na.rm = TRUE ),
71+ max_g = max(date , na.rm = TRUE ),
72+ .by = c(!!! x @ datum @ group_quo )
73+ )
6674
67- calendar_dbi <- seq_date_sql(start_date = x @ datum @ min_date ,end_date = x @ datum @ max_date ,time_unit = x @ time_unit @ value ,con = dbplyr :: remote_con(x @ datum @ data ))
75+ # 4. Generate Master Date Sequence ---------------------------------------
76+ # Create a single-column table of all possible dates in the range.
6877
69- }else {
78+ master_dates <- seq_date_sql(
79+ start_date = start_date ,
80+ end_date = x @ datum @ max_date ,
81+ time_unit = x @ time_unit @ value ,
82+ con = dbplyr :: remote_con(x @ datum @ data )
83+ )
7084
71- calendar_dbi <- seq_date_sql(start_date = start_date ,end_date = x @ datum @ max_date ,time_unit = x @ time_unit @ value ,con = dbplyr :: remote_con(x @ datum @ data ))
85+ # 5. Build the Scaffolding ------------------------------------------------
86+ # If groups exist, expand the calendar. We use an inner join to the bounds
87+ # to constrain the cross-join to only the "Active Life" of each group.
7288
89+ if (x @ datum @ group_indicator ) {
90+ calendar_dbi <- master_dates | >
91+ dplyr :: cross_join(
92+ active_bounds | > dplyr :: distinct(!!! x @ datum @ group_quo )
93+ ) | >
94+ dplyr :: inner_join(
95+ active_bounds ,
96+ by = dplyr :: join_by(!!! x @ datum @ group_quo )
97+ ) | >
98+ dplyr :: filter(date > = min_g & date < = max_g ) | >
99+ dplyr :: select(- min_g , - max_g )
100+ } else {
101+ calendar_dbi <- master_dates
73102 }
74103
75- # Expand calendar table with cross join of groups
76- if (x @ datum @ group_indicator ){
104+ # 6. Final Join & Gap Filling ---------------------------------------------
105+ # Combine the scaffold with actual data. Dates with no records are
106+ # flagged and filled with 0 to ensure continuous time intelligence.
77107
78- calendar_dbi <- calendar_dbi | >
79- dplyr :: cross_join(
80- summary_dbi | >
81- dplyr :: distinct(!!! x @ datum @ group_quo )
82- )
83- # dplyr::mutate(
84- # missing_date_indicator=dplyr::if_else(is.na(!!x@value@value_quo),1,0)
85- # ,!!x@value@value_vec:= dplyr::coalesce(!!x@value@value_quo, 0)
86- # )
87108
88- }
89109
90- # Perform a full join to ensure all time frames are represented
91- full_dbi <- dplyr :: full_join(
92- calendar_dbi
93- ,summary_dbi
94- ,by = dplyr :: join_by(date ,!!! x @ datum @ group_quo )
95- ) | >
110+ full_dbi <- calendar_dbi | >
111+ dplyr :: full_join(
112+ summary_dbi ,
113+ by = dplyr :: join_by(date , !!! x @ datum @ group_quo )
114+ ) | >
96115 dplyr :: mutate(
97- missing_date_indicator = dplyr :: if_else(is.na(!! x @ value @ value_quo ),1 , 0 )
98- , !! x @ value @ value_vec : = dplyr :: coalesce(!! x @ value @ value_quo , 0 )
116+ missing_date_indicator = dplyr :: if_else(is.na(!! x @ value @ value_quo ), 1 , 0 ),
117+ !! x @ value @ value_vec : = dplyr :: coalesce(!! x @ value @ value_quo , 0 )
99118 )
100119
101-
102-
103-
104120 return (full_dbi )
105121}
106122
0 commit comments