Skip to content

Commit bbe08a4

Browse files
author
Yongyi Wu
committed
separated PR (Diffusion Model resampler for STM) from PR #1794
1 parent 6a994e7 commit bbe08a4

12 files changed

Lines changed: 2967 additions & 3 deletions

MCDataProducts/inc/GenId.hh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ namespace mu2e {
4444
cosmicCRY, pbarFlat, fromAscii, ExternalRMC, InternalRMC, CeLeadingLog, cosmicCORSIKA, //44
4545
MuCapProtonGenTool, MuCapDeuteronGenTool, DIOGenTool, MuCapNeutronGenTool, // 48
4646
MuCapPhotonGenTool, MuCapGammaRayGenTool, CeLeadingLogGenTool, MuplusMichelGenTool,// 52
47-
gammaPairProduction, antiproton, Mu2eXGenTool,//55
48-
lastEnum //56
47+
gammaPairProduction, antiproton, Mu2eXGenTool, STMDownStreamGenTool,//56
48+
lastEnum //57
4949
};
5050

5151
#ifndef SWIG
@@ -64,7 +64,7 @@ namespace mu2e {
6464
"CosmicCRY", "pbarFlat","fromAscii","ExternalRMC","InternalRMC","CeLeadingLog", "CosmicCORSIKA", \
6565
"MuCapProtonGenTool", "MuCapDeuteronGenTool", "DIOGenTool", "MuCapNeutronGenTool", \
6666
"MuCapPhotonGenTool", "MuCapGammaRayGenTool","CeLeadingLogGenTool","MuplusMichelGenTool", \
67-
"gammaPairProduction", "antiproton", "Mu2eXGenTool"
67+
"gammaPairProduction", "antiproton", "Mu2eXGenTool", "STMDownStreamGenTool"
6868
#endif
6969

7070
public:
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
cet_make_library(
2+
SOURCE
3+
src/ScoreBasedDiffusionModel.cc
4+
LIBRARIES PUBLIC
5+
CLHEP::CLHEP
6+
messagefacility::messagefacility
7+
cetlib_except::cetlib_except
8+
)
9+
10+
install_source(SUBDIRS src)
11+
install_headers(USE_PROJECT_NAME SUBDIRS inc)
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
// Module for training and using score-based diffusion model
2+
// Added by Yongyi Wu
3+
// Mar. 2026
4+
5+
#pragma once
6+
7+
#include <vector>
8+
#include <algorithm>
9+
#include <cmath>
10+
#include <string>
11+
#include <iomanip>
12+
#include <fstream>
13+
#include <sstream>
14+
#include <iostream>
15+
#include <cctype>
16+
#include <cassert>
17+
18+
#include "CLHEP/Random/RandomEngine.h"
19+
#include "CLHEP/Random/RandFlat.h"
20+
#include "CLHEP/Random/RandGaussQ.h"
21+
22+
#include "messagefacility/MessageLogger/MessageLogger.h"
23+
#include "cetlib_except/exception.h"
24+
25+
namespace mu2e{
26+
struct DiffusionTrainingSample {
27+
std::vector<double> x; // transformed state vector to diffuse (size = dim)
28+
std::vector<double> cond; // optional conditioning vector (size = conditionDim)
29+
};
30+
31+
class ScoreBasedDiffusionModel {
32+
public:
33+
// Enumeration for optimizer selection
34+
enum class OptimizerType {
35+
SGD, // Stochastic Gradient Descent
36+
ADAM // Adam optimizer
37+
};
38+
39+
// Enumeration for noise schedule selection
40+
enum class NoiseScheduleType {
41+
LINEAR, // Linear noise schedule
42+
COSINE // Cosine noise schedule
43+
};
44+
45+
// Constructor: Initialize diffusion model with CLHEP random distributions.
46+
//
47+
// Parameters:
48+
// randFlat - Reference to CLHEP RandFlat for uniform sampling (externally managed)
49+
// randGaussQ - Reference to CLHEP RandGaussQ for Gaussian noise (externally managed)
50+
// dim - Dimensionality of the state space
51+
// conditionDim - Dimensionality of the optional conditioning vector (default: 0 for unconditional model)
52+
// hidden - Size of hidden layers in the neural network
53+
// layers - Number of layers in the network
54+
// optimizerType - Type of optimizer to use (SGD or ADAM, default: ADAM)
55+
// adamBeta1 - Adam optimizer beta1 parameter (default: 0.9)
56+
// adamBeta2 - Adam optimizer beta2 parameter (default: 0.999)
57+
// adamEps - Adam optimizer epsilon parameter (default: 1e-8)
58+
// scheduleType - Type of noise schedule (LINEAR or COSINE, default: COSINE)
59+
// betaMin - Minimum noise schedule parameter (for LINEAR schedule, default: 1e-4)
60+
// betaMax - Maximum noise schedule parameter (for LINEAR schedule, default: 0.02)
61+
// cosineOffset - Offset parameter (for cosine schedule, default: 0.008)
62+
// batchSize - Batch size for training (default: 32)
63+
// gradientClipThreshold - Threshold for gradient clipping (default: 1.0)
64+
// learningRate - Learning rate for training (default: 1e-3)
65+
// diffusionSteps - Number of steps in the diffusion process (default: 200)
66+
ScoreBasedDiffusionModel(
67+
// Network architecture parameters
68+
CLHEP::RandFlat& randFlat,
69+
CLHEP::RandGaussQ& randGaussQ,
70+
int dim,
71+
int conditionDim,
72+
int hidden,
73+
int layers,
74+
// Optimizer configuration
75+
OptimizerType optimizerType = OptimizerType::ADAM,
76+
double adamBeta1 = 0.9,
77+
double adamBeta2 = 0.999,
78+
double adamEps = 1e-8,
79+
// Noise schedule configuration
80+
NoiseScheduleType scheduleType = NoiseScheduleType::COSINE,
81+
double betaMin = 1e-4,
82+
double betaMax = 0.02,
83+
double cosineOffset = 0.008,
84+
// Training configuration
85+
int batchSize = 32,
86+
double gradientClipThreshold = 1.0,
87+
double learningRate = 1e-3,
88+
// Diffusion process configuration
89+
int diffusionSteps = 200
90+
);
91+
92+
// Train the score network on a batch of samples.
93+
// Uses random sampling and noise injection via the external engine.
94+
// Note that training needs to occur on all data samples. Training on multiple small subsets
95+
// of data and then averaging or aggregating the model parameters is not supported and may lead to
96+
// issues as neural networks are not linear.
97+
//
98+
// Parameters:
99+
// data - Training samples (transformed state vectors)
100+
// epochs - Number of training epochs to perform
101+
void train(
102+
const std::vector<DiffusionTrainingSample>& data,
103+
int epochs
104+
);
105+
106+
// Generate a new sample from the diffusion model via reverse process.
107+
// Uses the external random engine for noise generation during sampling.
108+
//
109+
// Parameters:
110+
// condition - Optional conditioning vector (must match conditionDim_ when enabled)
111+
// useHeun - If true, uses Heun's method (2nd order, default). If false, uses Euler's method (1st order)
112+
// diffusionSteps - Number of diffusion steps for sampling (default: -1 uses the model's configured diffusionSteps_)
113+
//
114+
// Returns: A generated sample vector of dimension dim_
115+
std::vector<double> generateSample(
116+
const std::vector<double>& condition = {},
117+
bool useHeun = true,
118+
int diffusionSteps = -1
119+
);
120+
121+
// Save the model parameters to a CSV file with annotations for later use.
122+
// Uses a default filename of "DiffusionModel.csv" if not specified.
123+
//
124+
// Parameters:
125+
// filename - Path to the CSV file where model parameters will be saved (default: "DiffusionModel.csv")
126+
void saveModel(const std::string& filename = "DiffusionModel.csv");
127+
128+
// Load model parameters from a file to restore a previously trained model.
129+
// Note that as the adam optimizer state is not saved, it is not possible to resume training from a loaded
130+
// model and pick up the training process where it left off. The loaded model can only be used for sampling.
131+
//
132+
// Parameters:
133+
// randFlat / RandGaussQ - CLHEP random number generator wrappers being passed
134+
// filename - Path to the file from which model parameters will be loaded
135+
static ScoreBasedDiffusionModel loadModel(
136+
CLHEP::RandFlat& randFlat,
137+
CLHEP::RandGaussQ& randGaussQ,
138+
const std::string& filename
139+
);
140+
141+
private:
142+
143+
// ----- network -----
144+
145+
// Represents a single fully-connected layer with weights and biases.
146+
struct Layer {
147+
std::vector<std::vector<double>> W; // Weight matrix [output_size][input_size]
148+
std::vector<double> b; // Bias vector [output_size]
149+
150+
// storage for gradients during back-propagation
151+
std::vector<std::vector<double>> gradW; // gradient of loss w.r.t. weights
152+
std::vector<double> gradb; // gradient of loss w.r.t. biases
153+
154+
// Adam optimizer state buffers
155+
std::vector<std::vector<double>> mW; // First moment estimates for weights, m_t = beta1 m_{t-1} + (1-beta1) g_t
156+
std::vector<std::vector<double>> vW; // Second moment estimates for weights, v_t = beta2 v_{t-1} + (1-beta2) g_t^2
157+
158+
std::vector<double> mb; // First moment estimates for biases
159+
std::vector<double> vb; // Second moment estimates for biases
160+
};
161+
162+
std::vector<Layer> network_; // Network layers in forward order
163+
164+
// Storage for activations and pre-activations during forward pass (used in back-propagation)
165+
// During forward pass, preactivations_[l] = W[l] * activations_[l-1] + b[l],
166+
// and activations_[l] = activationFunction(preactivations_[l])
167+
// These are needed to compute gradients during the backward pass.
168+
std::vector<std::vector<double>> activations_;
169+
std::vector<std::vector<double>> preactivations_;
170+
171+
// Forward pass through the network.
172+
// Computes the score function.
173+
// input actually contains the state vector, optional condition vector, and the time embedding.
174+
// dimension hence is dim_ + conditionDim_ + 1.
175+
//
176+
// Parameters:
177+
// x - Input vector of dimension dim_ + conditionDim_ + 1
178+
//
179+
// Returns: Output vector (dimension depends on network architecture)
180+
std::vector<double> forward(
181+
const std::vector<double>& x
182+
);
183+
184+
// Backward pass for gradient computation.
185+
// Computes gradients w.r.t. network parameters via chain rule.
186+
//
187+
// Parameters:
188+
// gradOutput - Gradient of loss w.r.t. network output
189+
//
190+
// Returns: Gradient of loss w.r.t. network parameters (gradW and gradb for each layer)
191+
void backward(
192+
const std::vector<double>& gradOutput
193+
);
194+
195+
// Update network weights using computed gradients (Stochastic Gradient Descent SGD).
196+
// Applied after backward pass. Alternately, an Adam optimizer step can be implemented
197+
// in adamUpdate() for better convergence.
198+
//
199+
// Parameters:
200+
// lr - Learning rate for gradient descent step
201+
void updateWeights(double lr);
202+
203+
// Update network weights using Adam optimization algorithm.
204+
// This method would implement the Adam update rule using the stored gradients and moment estimates.
205+
//
206+
// Parameters:
207+
// lr - Learning rate for Adam update
208+
void adamUpdate(double lr);
209+
210+
// ----- diffusion -----
211+
212+
// Noise schedule parameter beta(t) over diffusion time [0,1].
213+
// Linear interpolation between betaMin_ and betaMax_.
214+
//
215+
// Parameters:
216+
// t - Diffusion time parameter in [0,1]
217+
//
218+
// Returns: Beta value for the given time step
219+
double beta(double t) const;
220+
221+
// Standard deviation of noise at diffusion time t.
222+
// Related to the noise schedule via sigma(t) = sqrt(1 - exp(-integral(beta(s) ds))).
223+
//
224+
// Parameters:
225+
// t - Diffusion time parameter in [0,1]
226+
//
227+
// Returns: Noise standard deviation at time t
228+
double sigma(double t) const;
229+
230+
// Add Gaussian noise to a state vector at diffusion time t.
231+
// Uses external engine (randGaussQ_) for reproducible noise generation.
232+
// Noise sample is stored in eps for later use in training.
233+
//
234+
// Parameters:
235+
// x - Original state vector
236+
// t - Diffusion time parameter in [0,1]
237+
// eps - Output: Gaussian noise vector used for perturbation (size = dim_)
238+
//
239+
// Returns: Noisy state = x + sigma(t) * eps
240+
std::vector<double> addNoise(
241+
const std::vector<double>& x,
242+
double t,
243+
std::vector<double>& eps
244+
);
245+
246+
// ----- loss -----
247+
248+
// Compute Mean Squared Error between predicted score and target score.
249+
// Used during training to measure model performance.
250+
//
251+
// Parameters:
252+
// score - Model's predicted score vector
253+
// target - Ground truth target score vector
254+
//
255+
// Returns: MSE loss value (scalar)
256+
double computeLoss(
257+
const std::vector<double>& score,
258+
const std::vector<double>& target
259+
) const;
260+
261+
// Clip gradients to prevent exploding gradients during training.
262+
//
263+
// Parameters:
264+
// maxNorm - Maximum allowed norm for the gradients. If the total norm exceeds this threshold,
265+
// gradients are scaled down to have norm equal to maxNorm.
266+
void clipGradients(double maxNorm);
267+
268+
// ----- internal vars -----
269+
270+
// The random engine is NOT owned by this class. It is injected externally
271+
// by the framework. Below are CLHEP distribution wrappers for actual random number generation.
272+
// These wrap the engine_ and provide specific probability distributions.
273+
// - RandFlat: Uniform distribution on [0,1)
274+
// - RandGaussQ: Gaussian (normal) distribution with mean=0, sigma=1 (or custom)
275+
// Both are bound in the constructor to externally managed wrappers.
276+
CLHEP::RandFlat& randFlat_; // Used for uniform sampling (e.g., batch selection)
277+
CLHEP::RandGaussQ& randGaussQ_; // Used for Gaussian noise in diffusion process
278+
279+
// Model hyperparameters
280+
int dim_; // Dimensionality of state space
281+
int conditionDim_; // Dimensionality of the optional conditioning vector
282+
int hidden_; // Hidden layer size
283+
int layers_; // Number of network layers
284+
285+
// Optimizer configuration
286+
OptimizerType optimizerType_; // Type of optimizer to use (SGD or ADAM)
287+
288+
// Adam optimizer parameters
289+
double adamBeta1_; // First moment exponential decay rate (default: 0.9)
290+
double adamBeta2_; // Second moment exponential decay rate (default: 0.999)
291+
double adamEps_; // Small constant for numerical stability (default: 1e-8)
292+
293+
// Noise schedule configuration
294+
NoiseScheduleType noiseScheduleType_; // Type of noise schedule (LINEAR or COSINE)
295+
296+
// Linear noise schedule parameters (beta(t) = betaMin + t*(betaMax - betaMin))
297+
// default betaMin = 1e-4, betaMax = 0.02 are typical values used in diffusion models, but can be tuned for specific applications.
298+
double betaMin_; // Beta value at t=0
299+
double betaMax_; // Beta value at t=1
300+
301+
// Cosine noise schedule parameter (offset to avoid singularity at t=0 in cosine schedule, default: 0.008)
302+
double cosineOffset_;
303+
304+
// Training configuration
305+
int batchSize_; // Batch size for vectorized training (default: 32)
306+
double gradientClipThreshold_; // Gradient clipping threshold (default: 1.0)
307+
double learningRate_; // Learning rate for training (default: 1e-3)
308+
309+
// Diffusion process discretization
310+
int diffusionSteps_; // Number of time steps to generate a sample (default: 200)
311+
312+
// Training state
313+
double runningLoss_; // Accumulated loss for monitoring during training
314+
int adamStep_; // Step counter for Adam optimizer (used to compute bias-corrected moment estimates)
315+
int trainingSampleSize_; // Total number of training samples
316+
317+
// Container for tracking training loss over epochs
318+
std::vector<double> epochLosses_;
319+
320+
};
321+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env python
2+
#
3+
# Script to build the files found in this directory.
4+
#
5+
6+
import os
7+
Import('env')
8+
Import('mu2e_helper')
9+
10+
helper=mu2e_helper(env)
11+
12+
mainlib = helper.make_mainlib ( [ 'CLHEP',
13+
'cetlib',
14+
'cetlib_except',
15+
'MF_MessageLogger' ] )
16+
17+
# This tells emacs to view this file in python mode.
18+
# Local Variables:
19+
# mode:python
20+
# End:

0 commit comments

Comments
 (0)