-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproblem_generator.py
More file actions
29 lines (22 loc) · 1.36 KB
/
problem_generator.py
File metadata and controls
29 lines (22 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import numpy as np
from sklearn.preprocessing import KBinsDiscretizer
from qunfold.utils import normalize_response
def generate_problem_instance(problem_params, seed):
np.random.seed(seed)
# Generate Monte Carlo truth and measured data distributions
truth_data = problem_params["pdf_data"](*problem_params["pdf_data_params"], size=problem_params["num_entries"])
smearing = problem_params["pdf_smear"](*problem_params["pdf_smear_params"], size=problem_params["num_entries"])
eff_mask = np.random.rand(problem_params["num_entries"]) < problem_params["efficiency"]
measured_data = (truth_data + smearing)[eff_mask]
# Define histograms binning by adaptive KMeans algorithm
kbd = KBinsDiscretizer(n_bins=problem_params["num_bins"], encode="ordinal", strategy="kmeans")
kbd.fit(measured_data.reshape(-1, 1))
bin_edges = kbd.bin_edges_[0].tolist()
binning = np.array([-np.inf] + bin_edges + [np.inf])
# Build truth histogram (x vector) and measured histogram (d vector)
truth, _ = np.histogram(truth_data, bins=binning)
measured, _ = np.histogram(measured_data, bins=binning)
# Build and normalize detector response (R matrix)
response, _, _ = np.histogram2d(measured_data, truth_data[eff_mask], bins=binning)
response = normalize_response(response=response, truth_mc=truth)
return truth, measured, response, binning