|
| 1 | +""" |
| 2 | +Pre-generate observation vector for DK-Sor calibration. |
| 3 | +
|
| 4 | +Reads daily FluxNet observations (NEE, Qle, Qh) for 2004-2013, filters for |
| 5 | +days with valid data and daily mean wind speed < 5 m/s, and creates a single |
| 6 | +EKP.Observation. Saves to JLD2 for use by the calibration driver. |
| 7 | +
|
| 8 | +Run once before calibration: |
| 9 | + julia --project=.buildkite experiments/calibrate_dk_sor/generate_observations.jl |
| 10 | +""" |
| 11 | + |
| 12 | +using NCDatasets |
| 13 | +using Dates |
| 14 | +using Statistics |
| 15 | +using LinearAlgebra |
| 16 | +import JLD2 |
| 17 | +import EnsembleKalmanProcesses as EKP |
| 18 | +import ClimaLand |
| 19 | + |
| 20 | +const FT = Float64 |
| 21 | +const climaland_dir = pkgdir(ClimaLand) |
| 22 | + |
| 23 | +flux_nc_path = joinpath( |
| 24 | + climaland_dir, |
| 25 | + "DK_Sor", |
| 26 | + "DK-Sor_daily_aggregated_1997-2013_FLUXNET2015_Flux.nc", |
| 27 | +) |
| 28 | +met_nc_path = joinpath( |
| 29 | + climaland_dir, |
| 30 | + "DK_Sor", |
| 31 | + "DK-Sor_1997-2014_FLUXNET2015_Met.nc", |
| 32 | +) |
| 33 | + |
| 34 | +# Calibration years (after 1-year spinup starting 2003) |
| 35 | +cal_years = 2004:2013 |
| 36 | + |
| 37 | +# ── Read flux observations ─────────────────────────────────────────────────── |
| 38 | +flux_ds = NCDataset(flux_nc_path, "r") |
| 39 | +flux_times_dt = flux_ds["time"][:] |
| 40 | + |
| 41 | +nee_raw = Float64.(coalesce.(flux_ds["NEE_daily"][:], NaN)) |
| 42 | +qle_raw = Float64.(coalesce.(flux_ds["Qle_daily"][:], NaN)) |
| 43 | +qh_raw = Float64.(coalesce.(flux_ds["Qh_daily"][:], NaN)) |
| 44 | + |
| 45 | +nee_uc_raw = Float64.(coalesce.(flux_ds["NEE_uc_daily"][:], NaN)) |
| 46 | +qle_uc_raw = Float64.(coalesce.(flux_ds["Qle_uc_daily"][:], NaN)) |
| 47 | +qh_uc_raw = Float64.(coalesce.(flux_ds["Qh_uc_daily"][:], NaN)) |
| 48 | +close(flux_ds) |
| 49 | + |
| 50 | +flux_dates = Date.(flux_times_dt) |
| 51 | + |
| 52 | +# ── Compute daily mean wind speed from hourly Met data ──────────────────────── |
| 53 | +met_ds = NCDataset(met_nc_path, "r") |
| 54 | +wind_data = Float64.(coalesce.(met_ds["Wind"][1, 1, :], NaN)) |
| 55 | +met_times = met_ds["time"][:] |
| 56 | +close(met_ds) |
| 57 | + |
| 58 | +met_dates = Date.(met_times) |
| 59 | + |
| 60 | +# Average wind speed per day |
| 61 | +daily_wind = Dict{Date, Float64}() |
| 62 | +wind_by_day = Dict{Date, Vector{Float64}}() |
| 63 | +for (i, d) in enumerate(met_dates) |
| 64 | + v = wind_data[i] |
| 65 | + isnan(v) && continue |
| 66 | + if !haskey(wind_by_day, d) |
| 67 | + wind_by_day[d] = Float64[] |
| 68 | + end |
| 69 | + push!(wind_by_day[d], v) |
| 70 | +end |
| 71 | +for (d, vals) in wind_by_day |
| 72 | + daily_wind[d] = mean(vals) |
| 73 | +end |
| 74 | + |
| 75 | +# ── Build valid-day mask ────────────────────────────────────────────────────── |
| 76 | +WIND_THRESHOLD = 5.0 # m/s |
| 77 | + |
| 78 | +cal_mask = |
| 79 | + Date(first(cal_years), 1, 1) .<= flux_dates .<= Date(last(cal_years), 12, 31) |
| 80 | + |
| 81 | +valid_mask = cal_mask .& |
| 82 | + .!isnan.(nee_raw) .& .!isnan.(qle_raw) .& .!isnan.(qh_raw) .& |
| 83 | + (abs.(nee_raw) .< 1e10) .& (abs.(qle_raw) .< 1e10) .& (abs.(qh_raw) .< 1e10) |
| 84 | + |
| 85 | +# Apply wind filter |
| 86 | +for i in eachindex(valid_mask) |
| 87 | + if valid_mask[i] |
| 88 | + w = get(daily_wind, flux_dates[i], NaN) |
| 89 | + if isnan(w) || w >= WIND_THRESHOLD |
| 90 | + valid_mask[i] = false |
| 91 | + end |
| 92 | + end |
| 93 | +end |
| 94 | + |
| 95 | +obs_dates = flux_dates[valid_mask] |
| 96 | +nee_obs = nee_raw[valid_mask] |
| 97 | +qle_obs = qle_raw[valid_mask] |
| 98 | +qh_obs = qh_raw[valid_mask] |
| 99 | +nee_uc = nee_uc_raw[valid_mask] |
| 100 | +qle_uc = qle_uc_raw[valid_mask] |
| 101 | +qh_uc = qh_uc_raw[valid_mask] |
| 102 | + |
| 103 | +n_obs = length(obs_dates) |
| 104 | +println("Valid observation days: $n_obs ($(first(cal_years))-$(last(cal_years)), wind < $(WIND_THRESHOLD) m/s)") |
| 105 | + |
| 106 | +# ── Noise covariance ────────────────────────────────────────────────────────── |
| 107 | +nee_var = mean(filter(!isnan, nee_uc .^ 2)) |
| 108 | +qle_var = mean(filter(!isnan, qle_uc .^ 2)) |
| 109 | +qh_var = mean(filter(!isnan, qh_uc .^ 2)) |
| 110 | +println( |
| 111 | + "Noise variances - NEE: $(round(nee_var, sigdigits=3)) (gC/m²/d)², " * |
| 112 | + "Qle: $(round(qle_var, sigdigits=3)) (W/m²)², " * |
| 113 | + "Qh: $(round(qh_var, sigdigits=3)) (W/m²)²", |
| 114 | +) |
| 115 | + |
| 116 | +# ── Build single observation ───────────────────────────────────────────────── |
| 117 | +y_obs = vcat(nee_obs, qle_obs, qh_obs) |
| 118 | +noise_diag = vcat( |
| 119 | + fill(nee_var, n_obs), |
| 120 | + fill(qle_var, n_obs), |
| 121 | + fill(qh_var, n_obs), |
| 122 | +) |
| 123 | + |
| 124 | +observation = EKP.Observation( |
| 125 | + Dict( |
| 126 | + "samples" => y_obs, |
| 127 | + "covariances" => Diagonal(noise_diag), |
| 128 | + "names" => "dk_sor_$(first(cal_years))_$(last(cal_years))", |
| 129 | + ), |
| 130 | +) |
| 131 | + |
| 132 | +println("Observation vector length: $(length(y_obs)) (3 × $n_obs)") |
| 133 | + |
| 134 | +# ── Save ────────────────────────────────────────────────────────────────────── |
| 135 | +obs_filepath = |
| 136 | + joinpath(climaland_dir, "experiments/calibrate_dk_sor/observations.jld2") |
| 137 | +JLD2.jldsave( |
| 138 | + obs_filepath; |
| 139 | + observation = observation, |
| 140 | + obs_dates = obs_dates, |
| 141 | + cal_years = collect(cal_years), |
| 142 | + y_obs = y_obs, |
| 143 | + noise_cov = Diagonal(noise_diag), |
| 144 | +) |
| 145 | +println("Saved to $obs_filepath") |
0 commit comments