From 0170dca7a777cec3a2e9fc843dceb9f87717c7c2 Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Tue, 1 Jul 2025 09:28:44 +0000 Subject: [PATCH 01/23] Implementing check for repeated added rules. --- validphys2/src/validphys/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index f7d8079808..5fad1f2c7e 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1541,7 +1541,7 @@ def produce_rules( ) except RuleProcessingError as e: raise ConfigError(f"Error Processing filter rules: {e}") from e - + if added_filter_rules: for i, rule in enumerate(added_filter_rules): try: From 6cf545b03fc8a4f375f894aded5dd54763ce4a88 Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Tue, 1 Jul 2025 11:17:38 +0000 Subject: [PATCH 02/23] Implementing uniqueness parsing logic for filter rules. --- validphys2/src/validphys/filters.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 7cedcea9a0..7334b5e825 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -125,7 +125,6 @@ def to_dict(self): class FilterRule: """ Dataclass which carries the filter rule information. - """ dataset: str = None @@ -168,10 +167,13 @@ def default_filter_rules_input(): """ Return a tuple of FilterRule objects. These are defined in ``filters.yaml`` in the ``validphys.cuts`` module. - Similarly to `parse_added_filter_rules`, this function checks if the rules - are unique, i.d. if there are no multiple rules for the same dataset of + Similarly to `parse_added_filter_rules`, this function checks if the rules + are unique, i.d. if there are no multiple rules for the same dataset of process with the same rule (`reason` and `local_variables` are not hashed). """ + # TODO: This should be done using a more sophisticated comparison + # that checks if two rules are actually the same, regardless of the + # order in which the cuts are defined. list_rules = yaml_safe.load(read_text(validphys.cuts, "filters.yaml")) unique_rules = set(FilterRule(**rule) for rule in list_rules) if len(unique_rules) != len(list_rules): From 1f1a90c289350cdff49cb21c6b63c7aa3d3e395d Mon Sep 17 00:00:00 2001 From: Amedeo Chiefa <103528316+achiefa@users.noreply.github.com> Date: Wed, 2 Jul 2025 09:40:34 +0000 Subject: [PATCH 03/23] Update error message --- validphys2/src/validphys/config.py | 2 +- validphys2/src/validphys/filters.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 5fad1f2c7e..f7d8079808 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1541,7 +1541,7 @@ def produce_rules( ) except RuleProcessingError as e: raise ConfigError(f"Error Processing filter rules: {e}") from e - + if added_filter_rules: for i, rule in enumerate(added_filter_rules): try: diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 7334b5e825..0966988f3f 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -167,8 +167,8 @@ def default_filter_rules_input(): """ Return a tuple of FilterRule objects. These are defined in ``filters.yaml`` in the ``validphys.cuts`` module. - Similarly to `parse_added_filter_rules`, this function checks if the rules - are unique, i.d. if there are no multiple rules for the same dataset of + Similarly to `parse_added_filter_rules`, this function checks if the rules + are unique, i.d. if there are no multiple rules for the same dataset of process with the same rule (`reason` and `local_variables` are not hashed). """ # TODO: This should be done using a more sophisticated comparison From b1d9d2c0ec002a43a6840ba5e4a7ddd963a7989d Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 15 Jul 2024 09:43:25 +0100 Subject: [PATCH 04/23] Copied fro branch 'HT_thcovmat' --- validphys2/src/validphys/commondata.py | 4 + validphys2/src/validphys/config.py | 50 +++++- validphys2/src/validphys/dataplots.py | 14 +- .../theorycovariance/construction.py | 164 +++++++++++++++++- 4 files changed, 222 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index 7c68c0784f..e59ac5ce2c 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -38,3 +38,7 @@ def loaded_commondata_with_cuts(commondata, cuts): groups_dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input") ) + +groups_dataset_inputs_loaded_cd_with_cuts_byprocess = collect( + "loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data") + ) \ No newline at end of file diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index f7d8079808..0be74c8f69 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1294,6 +1294,29 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""): l = self.loader fileloc = l.check_vp_output_file(user_covmat_path) return fileloc + + + @configparser.explicit_node + def produce_covmat_custom(self, use_ht_uncertainties: bool = False, ht_version: int = 1): + if use_ht_uncertainties: + from validphys.theorycovariance.construction import thcov_ht + + return thcov_ht + else: + from validphys.theorycovariance.construction import covs_pt_prescrip + + return covs_pt_prescrip + + @configparser.explicit_node + def produce_combine_custom(self, use_ht_uncertainties: bool = False): + if use_ht_uncertainties: + from validphys.theorycovariance.construction import combine_by_type_ht + + return combine_by_type_ht + else: + from validphys.theorycovariance.construction import combine_by_type + + return combine_by_type @configparser.explicit_node def produce_nnfit_theory_covmat( @@ -1321,8 +1344,33 @@ def produce_nnfit_theory_covmat( from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting + elif use_ht_uncertainties: + # NOTE: this covmat is the same as for scale variations, which will result in a clash of + # table names if we wish to use them simultaneously + if use_user_uncertainties: + from validphys.theorycovariance.construction import total_theory_covmat_fitting + + f = total_theory_covmat_fitting + else: + from validphys.theorycovariance.construction import theory_covmat_custom_fitting + + f = theory_covmat_custom_fitting + + @functools.wraps(f) + def res(*args, **kwargs): + return f(*args, **kwargs) + + # Set this to get the same filename regardless of the action. + res.__name__ = "theory_covmat" + return res + + + @configparser.explicit_node + def produce_combine_by_type_custom(self, use_ht_uncertainties: bool = False): + if use_ht_uncertainties: + return validphys.theorycovariance.construction.combine_by_type_ht + return validphys.theorycovariance.construction.combine_by_type - return f def produce_fitthcovmat( self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index c39de940a9..e96bc9ccd7 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -1344,7 +1344,7 @@ def _check_display_cuts_requires_use_cuts(display_cuts, use_cuts): @make_argcheck def _check_marker_by(marker_by): - markers = ('process type', 'experiment', 'dataset', 'group') + markers = ('process type', 'experiment', 'dataset', 'group', 'kinematics') if marker_by not in markers: raise CheckError("Unknown marker_by value", marker_by, markers) @@ -1403,7 +1403,8 @@ def plot_xq2( will be displaed and marked. The points are grouped according to the `marker_by` option. The possible - values are: "process type", "experiment", "group" or "dataset". + values are: "process type", "experiment", "group" or "dataset" for discrete + colors, or "kinematics" for coloring by 1/(Q2(1-x)) Some datasets can be made to appear highlighted in the figure: Define a key called ``highlight_datasets`` containing the names of the datasets to be @@ -1534,6 +1535,7 @@ def plot_xq2( xh = defaultdict(list) q2h = defaultdict(list) + cvdict = defaultdict(list) if not highlight_datasets: highlight_datasets = set() @@ -1564,6 +1566,8 @@ def next_options(): elif marker_by == "group": # if group is None then make sure that shows on legend. key = str(group) + elif marker_by == "kinematics": + key = None else: raise ValueError('Unknown marker_by value') @@ -1579,6 +1583,7 @@ def next_options(): xdict = x q2dict = q2 + cvdict[key].append(commondata.load().get_cv()) xdict[key].append(fitted[0]) q2dict[key].append(fitted[1]) if display_cuts: @@ -1593,6 +1598,11 @@ def next_options(): else: # This is to get the label key coords = [], [] + if marker_by == "kinematics": + ht_magnitude = np.concatenate( cvdict[key]) / (coords[1] * (1 - coords[0]) ) + out = ax.scatter(*coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm()) + clb = fig.colorbar(out) + clb.ax.set_title(r'$F_\mathrm{exp}\frac{1}{Q^2(1-x)}$') ax.plot(*coords, label=key, markeredgewidth=1, markeredgecolor=None, **key_options[key]) # Iterate again so highlights are printed on top. diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 8c916f89d8..d6099f4c9c 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -19,6 +19,8 @@ check_fit_dataset_order_matches_grouped, process_lookup, ) +import scipy.linalg as la +import scipy.interpolate as scint log = logging.getLogger(__name__) @@ -47,7 +49,7 @@ def theory_covmat_dataset(results, results_central_bytheoryids, point_prescripti return thcovmat -ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes")) +ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes", "data")) def combine_by_type(each_dataset_results_central_bytheory): @@ -78,12 +80,160 @@ def combine_by_type(each_dataset_results_central_bytheory): for key, item in theories_by_process.items(): theories_by_process[key] = np.concatenate(item, axis=1) process_info = ProcessInfo( - preds=theories_by_process, namelist=ordered_names, sizes=dataset_size + preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=None ) return process_info -def covmat_3fpt(deltas1, deltas2): +def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess): + """same as combine_by_type but now for a single theory and including commondata info""" + dataset_size = defaultdict(list) + theories_by_process = defaultdict(list) + cd_by_process = defaultdict(list) + ordered_names = defaultdict(list) + for dataset, cd in zip( + each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess + ): + name = cd.setname + if name != dataset[0].name: + raise ValueError("The underlying datasets do not match!") + theory_centrals = [x.central_value for x in dataset] + dataset_size[name] = len(theory_centrals[0]) + proc_type = process_lookup(name) + ordered_names[proc_type].append(name) + cd_by_process[proc_type].append(cd.kinematics.values) + theories_by_process[proc_type].append(theory_centrals) + + for key in theories_by_process.keys(): + theories_by_process[key] = np.concatenate(theories_by_process[key], axis=1) + cd_by_process[key] = np.concatenate(cd_by_process[key], axis=0) + process_info = ProcessInfo( + preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=cd_by_process + ) + return process_info + + +def thcov_ht(combine_by_type_ht, H2_list, HL_list, reverse=False): + "Same as `thcov_HT` but implementing theory covariance method for each node of the spline." + process_info = combine_by_type_ht + running_index_tot = 0 + start_proc_by_exp = defaultdict(list) + deltas = defaultdict(list) + included_proc = ["DIS NC"] + excluded_exp = {"DIS NC" : ["NMC_NC_NOTFIXED_DW_EM-F2"]} + included_exp = {} + for proc in included_proc: + aux = [] + for exp in process_info.namelist[proc]: + if exp not in excluded_exp[proc]: + aux.append(exp) + included_exp[proc] = aux + + # ABMP parametrisation + x_abmp = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + + # Check that H2_list and HL_list have the same size as x + if (len(H2_list) != len(x_abmp)) or (len(HL_list) != len(x_abmp)): + raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") + + def wrapper_to_splines(i): + if not reverse: + shifted_H2_list = [0 for k in range(len(x_abmp))] + shifted_HL_list = [0 for k in range(len(x_abmp))] + shifted_H2_list[i] = H2_list[i] + shifted_HL_list[i] = HL_list[i] + else: + shifted_H2_list = H2_list.copy() + shifted_HL_list = HL_list.copy() + shifted_H2_list[i] = 0 + shifted_HL_list[i] = 0 + + H_2 = scint.CubicSpline(x_abmp, shifted_H2_list) + H_L = scint.CubicSpline(x_abmp, shifted_HL_list) + H_2 = np.vectorize(H_2) + H_L = np.vectorize(H_L) + return H_2, H_L + + for proc in process_info.namelist.keys(): + running_index_proc = 0 + x = np.array([]) + Q2 = np.array([]) + y = np.array([]) + + for exp in process_info.namelist[proc]: + # Locate position of the experiment + size = process_info.sizes[exp] + start_proc_by_exp[exp] = running_index_tot + running_index_tot += size + running_index_proc += size + + # Compute shifts only for a subset of processes + if proc in included_proc and exp in included_exp[proc]: + #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated + x = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] + Q2 = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] + y = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] + + if "SIGMA" in exp: + N_2, N_L = compute_normalisation_by_experiment(exp, x, y, Q2) + + elif "F2" in exp: + N_2 = np.ones(shape=x.shape) + N_L = np.zeros(shape=x.shape) + + else: + raise ValueError(f"The normalisation for the observable is not known.") + + # Loop over the parameter + for i in range(len(x_abmp)): + H_L, H_2 = wrapper_to_splines(i) + deltas[f"({i+1}+,0)"] += [N_2 * H_2(x) / Q2] + deltas[f"(0,{i+1}+)"] += [N_L * H_L(x) / Q2] + + + # Construct theory covmat + covmats = defaultdict(list) + for proc1 in included_proc: + for proc2 in included_proc: + for i, exp1 in enumerate(included_exp[proc1]): + for j, exp2 in enumerate(included_exp[proc2]): + s = np.zeros(shape=(deltas["(1+,0)"][i].size, deltas["(1+,0)"][j].size)) + for par in deltas.keys(): + s += np.outer(deltas[par][i], deltas[par][j]) + start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) + covmats[start_locs] = s + return covmats + + +def compute_normalisation_by_experiment(experiment_name, x, y, Q2): + N_2 = np.zeros(shape=y.shape) + N_L = np.zeros(shape=y.shape) + + if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: + yp = 1 + np.power(1 - y, 2) + yL = np.power(y, 2) + + if "HERA_NC" in experiment_name or "NMC" in experiment_name: + N_2 = 1 + N_L = - yL / yp + + elif "HERA_CC" in experiment_name: + N_2 = 1 / 4 * yp + N_L = - N_2 * yL / yp + + if "CHORUS_CC" in experiment_name: + yL = np.power(y, 2) + Gf = 1.1663787e-05 + Mh = 0.938 + MW2 = 80.398 ** 2 + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 + N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) + N_L = - N_2 * yL / yp + + return N_2, N_L + + +def covmat_3fpt(name1, name2, deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections of scale variation shifts""" @@ -311,11 +461,11 @@ def covs_pt_prescrip(combine_by_type, point_prescription): @table -def theory_covmat_custom_per_prescription(covs_pt_prescrip, procs_index, combine_by_type): +def theory_covmat_custom(covmat_custom, procs_index, combine_by_type_custom): """Takes the individual sub-covmats between each two processes and assembles them into a full covmat. Then reshuffles the order from ordering by process to ordering by experiment as listed in the runcard""" - process_info = combine_by_type + process_info = combine_by_type_custom # Construct a covmat_index based on the order of experiments as they are in combine_by_type # NOTE: maybe the ordering of covmat_index is always the same as that of procs_index? @@ -329,9 +479,9 @@ def theory_covmat_custom_per_prescription(covs_pt_prescrip, procs_index, combine covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names) # Put the covariance matrices between two process into a single covariance matrix - total_datapoints = sum(combine_by_type.sizes.values()) + total_datapoints = sum(process_info.sizes.values()) mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32) - for locs, cov in covs_pt_prescrip.items(): + for locs, cov in covmat_custom.items(): xsize, ysize = cov.shape mat[locs[0] : locs[0] + xsize, locs[1] : locs[1] + ysize] = cov df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index) From a91a1a7efa6fd5284616601354f77712889cf914 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 15 Jul 2024 09:45:37 +0100 Subject: [PATCH 05/23] Removed version --- validphys2/src/validphys/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 0be74c8f69..3c06912530 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1297,7 +1297,7 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""): @configparser.explicit_node - def produce_covmat_custom(self, use_ht_uncertainties: bool = False, ht_version: int = 1): + def produce_covmat_custom(self, use_ht_uncertainties: bool = False): if use_ht_uncertainties: from validphys.theorycovariance.construction import thcov_ht From ad73ef378d224a76df1e782b4ff2d92dc1653805 Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 15 Jul 2024 19:22:01 +0100 Subject: [PATCH 06/23] Saving progress - not ready --- .../theorycovariance/construction.py | 198 ++++++++++++------ 1 file changed, 133 insertions(+), 65 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index d6099f4c9c..b40fc0a027 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -5,22 +5,25 @@ from collections import defaultdict, namedtuple import logging +import operator import numpy as np import pandas as pd +import scipy.linalg as la +import scipy.interpolate as scint from reportengine import collect from reportengine.table import table pass from validphys.results import results, results_central +from validphys.convolution import central_fk_predictions +from validphys.core import PDF from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination, check_fit_dataset_order_matches_grouped, process_lookup, ) -import scipy.linalg as la -import scipy.interpolate as scint log = logging.getLogger(__name__) @@ -84,7 +87,6 @@ def combine_by_type(each_dataset_results_central_bytheory): ) return process_info - def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess): """same as combine_by_type but now for a single theory and including commondata info""" dataset_size = defaultdict(list) @@ -113,14 +115,14 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit return process_info -def thcov_ht(combine_by_type_ht, H2_list, HL_list, reverse=False): +def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, reverse=False): "Same as `thcov_HT` but implementing theory covariance method for each node of the spline." process_info = combine_by_type_ht running_index_tot = 0 start_proc_by_exp = defaultdict(list) deltas = defaultdict(list) included_proc = ["DIS NC"] - excluded_exp = {"DIS NC" : ["NMC_NC_NOTFIXED_DW_EM-F2"]} + excluded_exp = {"DIS NC" : []} included_exp = {} for proc in included_proc: aux = [] @@ -129,40 +131,21 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, reverse=False): aux.append(exp) included_exp[proc] = aux - # ABMP parametrisation + # ABMP parametrisation and target masses x_abmp = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] # Check that H2_list and HL_list have the same size as x if (len(H2_list) != len(x_abmp)) or (len(HL_list) != len(x_abmp)): raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") - - def wrapper_to_splines(i): - if not reverse: - shifted_H2_list = [0 for k in range(len(x_abmp))] - shifted_HL_list = [0 for k in range(len(x_abmp))] - shifted_H2_list[i] = H2_list[i] - shifted_HL_list[i] = HL_list[i] - else: - shifted_H2_list = H2_list.copy() - shifted_HL_list = HL_list.copy() - shifted_H2_list[i] = 0 - shifted_HL_list[i] = 0 - - H_2 = scint.CubicSpline(x_abmp, shifted_H2_list) - H_L = scint.CubicSpline(x_abmp, shifted_HL_list) - H_2 = np.vectorize(H_2) - H_L = np.vectorize(H_L) - return H_2, H_L - - for proc in process_info.namelist.keys(): + + for i_proc, proc in enumerate(process_info.namelist.keys()): running_index_proc = 0 - x = np.array([]) - Q2 = np.array([]) - y = np.array([]) + kin_dict = {} - for exp in process_info.namelist[proc]: + for i_exp, exp in enumerate(process_info.namelist[proc]): # Locate position of the experiment size = process_info.sizes[exp] + dataset = groups_data_by_process[i_proc].datasets[i_exp] start_proc_by_exp[exp] = running_index_tot running_index_tot += size running_index_proc += size @@ -170,26 +153,33 @@ def wrapper_to_splines(i): # Compute shifts only for a subset of processes if proc in included_proc and exp in included_exp[proc]: #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated - x = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] - Q2 = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] - y = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] - - if "SIGMA" in exp: - N_2, N_L = compute_normalisation_by_experiment(exp, x, y, Q2) - - elif "F2" in exp: - N_2 = np.ones(shape=x.shape) - N_L = np.zeros(shape=x.shape) - - else: - raise ValueError(f"The normalisation for the observable is not known.") + kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] + kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] + kin_dict['y']= process_info.data[proc].T[2][running_index_proc - size : running_index_proc] + kin_size = kin_dict['x'].size + print(exp) + target = extract_target(dataset) # Loop over the parameter for i in range(len(x_abmp)): - H_L, H_2 = wrapper_to_splines(i) - deltas[f"({i+1}+,0)"] += [N_2 * H_2(x) / Q2] - deltas[f"(0,{i+1}+)"] += [N_L * H_L(x) / Q2] - + PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list) + if target == 'proton': + deltas[f"p({i+1}+,0)"] += [PC_2] + deltas[f"p(0,{i+1}+)"] += [PC_L] + deltas[f"d({i+1}+,0)"] += [np.zeros(kin_size)] + deltas[f"d(0,{i+1}+)"] += [np.zeros(kin_size)] + elif target == 'deuterium': + deltas[f"p({i+1}+,0)"] += [np.zeros(kin_size)] + deltas[f"p(0,{i+1}+)"] += [np.zeros(kin_size)] + deltas[f"d({i+1}+,0)"] += [PC_2] + deltas[f"d(0,{i+1}+)"] += [PC_L] + elif target == 'ratio': + + compute_ratio_delta(dataset, pdf, "d") + deltas[f"p({i+1}+,0)"] += [PC_2] + deltas[f"p(0,{i+1}+)"] += [PC_L] + deltas[f"d({i+1}+,0)"] += [PC_2] + deltas[f"d(0,{i+1}+)"] += [PC_L] # Construct theory covmat covmats = defaultdict(list) @@ -197,38 +187,115 @@ def wrapper_to_splines(i): for proc2 in included_proc: for i, exp1 in enumerate(included_exp[proc1]): for j, exp2 in enumerate(included_exp[proc2]): - s = np.zeros(shape=(deltas["(1+,0)"][i].size, deltas["(1+,0)"][j].size)) + s = np.zeros(shape=(deltas["p(1+,0)"][i].size, deltas["p(1+,0)"][j].size)) for par in deltas.keys(): s += np.outer(deltas[par][i], deltas[par][j]) start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) covmats[start_locs] = s + import ipdb; ipdb.set_trace() return covmats +def extract_target(dataset): + if dataset.op == "NULL": + if "_P_" in dataset.name or "HERA" in dataset.name: + return "proton" + elif "_D_" in dataset.name: + return "deuteron" + else: + raise ValueError(f"No target detected for {dataset.name}") + elif dataset.op == "RATIO": + return "ratio" + else: + raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") + + +def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array): + """This function computes the predictions as in validphys.convolution._predictions, + but for ratio and including higher twist terms in bot NUM and """ + opfunc = operator.truediv + cuts = dataset.cuts + all_predictions = [] + for fk in dataset.fkspecs: + fk_w_cuts = fk.load_with_cuts(cuts) + tmp = central_fk_predictions(fk_w_cuts, pdf) + all_predictions.append(np.concatenate(tmp.values)) + import ipdb; ipdb.set_trace() + if target == "d": + all_predictions[0] += PC + if target == "p": + all_predictions[1] += PC + return opfunc(*all_predictions) + + +def compute_ht_parametrisation( + index: int, + nodes: list, + kin_dict: dict, + exp: str, + h2_prior: list, + hl_prior: list, + reverse: bool = False +): + if not reverse: + shifted_H2_list = [0 for k in range(len(nodes))] + shifted_HL_list = [0 for k in range(len(nodes))] + shifted_H2_list[index] = h2_prior[index] + shifted_HL_list[index] = hl_prior[index] + else: + shifted_H2_list = h2_prior.copy() + shifted_HL_list = hl_prior.copy() + shifted_H2_list[index] = 0 + shifted_HL_list[index] = 0 + + H_2 = scint.CubicSpline(nodes, shifted_H2_list) + H_L = scint.CubicSpline(nodes, shifted_HL_list) + H_2 = np.vectorize(H_2) + H_L = np.vectorize(H_L) + + x = kin_dict['x'] + y = kin_dict['y'] + Q2 = kin_dict['Q2'] + N2, NL = compute_normalisation_by_experiment(exp, x, y, Q2) + + PC_2 = N2 * H_2(x) / Q2 + PC_L = NL * H_2(x) / Q2 + return PC_2, PC_L + + def compute_normalisation_by_experiment(experiment_name, x, y, Q2): N_2 = np.zeros(shape=y.shape) N_L = np.zeros(shape=y.shape) - if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: - yp = 1 + np.power(1 - y, 2) - yL = np.power(y, 2) + if "SIGMA" in experiment_name: + + if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: + yp = 1 + np.power(1 - y, 2) + yL = np.power(y, 2) + + if "HERA_NC" in experiment_name or "NMC" in experiment_name: + N_2 = 1 + N_L = - yL / yp - if "HERA_NC" in experiment_name or "NMC" in experiment_name: - N_2 = 1 - N_L = - yL / yp + elif "HERA_CC" in experiment_name: + N_2 = 1 / 4 * yp + N_L = - N_2 * yL / yp - elif "HERA_CC" in experiment_name: - N_2 = 1 / 4 * yp - N_L = - N_2 * yL / yp + if "CHORUS_CC" in experiment_name: + yL = np.power(y, 2) + Gf = 1.1663787e-05 + Mh = 0.938 + MW2 = 80.398 ** 2 + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 + N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) + N_L = - N_2 * yL / yp - if "CHORUS_CC" in experiment_name: - yL = np.power(y, 2) - Gf = 1.1663787e-05 - Mh = 0.938 - MW2 = 80.398 ** 2 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 - N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) - N_L = - N_2 * yL / yp + elif "F2" in experiment_name: + N_2 = np.ones(shape=x.shape) + N_L = np.zeros(shape=x.shape) + + else: + raise ValueError(f"The normalisation for the observable is not known.") return N_2, N_L @@ -683,3 +750,4 @@ def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): each_dataset_results = collect(results, ("group_dataset_inputs_by_process", "data")) +groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) \ No newline at end of file From 5db862de20a9717c57004f79d85b72ad5eb55d5c Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 09:12:36 +0100 Subject: [PATCH 07/23] Implemented d/p ratio --- .../theorycovariance/construction.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index b40fc0a027..099ae16709 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -157,7 +157,6 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] kin_dict['y']= process_info.data[proc].T[2][running_index_proc - size : running_index_proc] kin_size = kin_dict['x'].size - print(exp) target = extract_target(dataset) # Loop over the parameter @@ -168,18 +167,18 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, deltas[f"p(0,{i+1}+)"] += [PC_L] deltas[f"d({i+1}+,0)"] += [np.zeros(kin_size)] deltas[f"d(0,{i+1}+)"] += [np.zeros(kin_size)] - elif target == 'deuterium': + elif target == 'deuteron': deltas[f"p({i+1}+,0)"] += [np.zeros(kin_size)] deltas[f"p(0,{i+1}+)"] += [np.zeros(kin_size)] deltas[f"d({i+1}+,0)"] += [PC_2] deltas[f"d(0,{i+1}+)"] += [PC_L] elif target == 'ratio': - - compute_ratio_delta(dataset, pdf, "d") - deltas[f"p({i+1}+,0)"] += [PC_2] - deltas[f"p(0,{i+1}+)"] += [PC_L] - deltas[f"d({i+1}+,0)"] += [PC_2] - deltas[f"d(0,{i+1}+)"] += [PC_L] + deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2)] + deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L)] + deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2)] + deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L)] + else: + raise ValueError("Could not detect target.") # Construct theory covmat covmats = defaultdict(list) @@ -192,7 +191,6 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, s += np.outer(deltas[par][i], deltas[par][j]) start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) covmats[start_locs] = s - import ipdb; ipdb.set_trace() return covmats @@ -210,7 +208,7 @@ def extract_target(dataset): raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") -def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array): +def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array) -> np.array: """This function computes the predictions as in validphys.convolution._predictions, but for ratio and including higher twist terms in bot NUM and """ opfunc = operator.truediv @@ -220,11 +218,10 @@ def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array): fk_w_cuts = fk.load_with_cuts(cuts) tmp = central_fk_predictions(fk_w_cuts, pdf) all_predictions.append(np.concatenate(tmp.values)) - import ipdb; ipdb.set_trace() if target == "d": all_predictions[0] += PC if target == "p": - all_predictions[1] += PC + all_predictions[1] += PC return opfunc(*all_predictions) From 0105aba09a3d13627fa4b1422b4642f4e0152950 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 09:34:09 +0100 Subject: [PATCH 08/23] Parsing 'separate_multiplicative' in vp_setupfit --- n3fit/src/n3fit/scripts/vp_setupfit.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index 849207ecc8..e9af7c73c8 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -216,8 +216,10 @@ def from_yaml(cls, o, *args, **kwargs): # Check positivity bound if file_content.get('positivity_bound') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append('positivity_bound check_unpolarized_bc') - - # Sets default values if they are not present in the runcard + if (sam_t0 := file_content.get('sampling')) is not None: + SETUPFIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get( + 'separate_multiplicative', False + ) for k, v in SETUPFIT_DEFAULTS.items(): file_content.setdefault(k, v) From 1cb7dcb6a903c24587cb8cf3bffa9eab4e2566cd Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 14:37:54 +0100 Subject: [PATCH 09/23] Minor adjustments --- .../src/validphys/theorycovariance/construction.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 099ae16709..63c59ffc8f 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -116,7 +116,11 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, reverse=False): - "Same as `thcov_HT` but implementing theory covariance method for each node of the spline." + """ + Same as `thcov_HT` but implementing theory covariance method for each node of the spline. + Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some + point we should use only one of them. + """ process_info = combine_by_type_ht running_index_tot = 0 start_proc_by_exp = defaultdict(list) @@ -140,7 +144,6 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, for i_proc, proc in enumerate(process_info.namelist.keys()): running_index_proc = 0 - kin_dict = {} for i_exp, exp in enumerate(process_info.namelist[proc]): # Locate position of the experiment @@ -149,13 +152,14 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, start_proc_by_exp[exp] = running_index_tot running_index_tot += size running_index_proc += size + kin_dict = {} # Compute shifts only for a subset of processes if proc in included_proc and exp in included_exp[proc]: #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated - kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] - kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] - kin_dict['y']= process_info.data[proc].T[2][running_index_proc - size : running_index_proc] + kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] + kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] + kin_dict['y'] = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] kin_size = kin_dict['x'].size target = extract_target(dataset) From b761e85506eacf5046cea618cc1f36646ee6b736 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 16 Jul 2024 14:48:01 +0100 Subject: [PATCH 10/23] Corrected bug --- .../src/validphys/theorycovariance/construction.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 63c59ffc8f..2a18694d6d 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -177,10 +177,10 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, deltas[f"d({i+1}+,0)"] += [PC_2] deltas[f"d(0,{i+1}+)"] += [PC_L] elif target == 'ratio': - deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2)] - deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L)] - deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2)] - deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L)] + deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2) - compute_ratio_delta(dataset, pdf)] + deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L) - compute_ratio_delta(dataset, pdf)] + deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2) - compute_ratio_delta(dataset, pdf)] + deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L) - compute_ratio_delta(dataset, pdf)] else: raise ValueError("Could not detect target.") @@ -212,7 +212,7 @@ def extract_target(dataset): raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") -def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array) -> np.array: +def compute_ratio_delta(dataset, pdf: PDF, target = None, PC: np.array = None) -> np.array: """This function computes the predictions as in validphys.convolution._predictions, but for ratio and including higher twist terms in bot NUM and """ opfunc = operator.truediv @@ -224,7 +224,7 @@ def compute_ratio_delta(dataset, pdf: PDF, target, PC: np.array) -> np.array: all_predictions.append(np.concatenate(tmp.values)) if target == "d": all_predictions[0] += PC - if target == "p": + elif target == "p": all_predictions[1] += PC return opfunc(*all_predictions) From e03e889bf5a47911af8eae1f722887b8cc8d6c79 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 17 Jul 2024 15:24:46 +0100 Subject: [PATCH 11/23] Correcting bug --- validphys2/src/validphys/theorycovariance/construction.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 2a18694d6d..43c406b008 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -165,7 +165,7 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, # Loop over the parameter for i in range(len(x_abmp)): - PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list) + PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list, reverse=reverse) if target == 'proton': deltas[f"p({i+1}+,0)"] += [PC_2] deltas[f"p(0,{i+1}+)"] += [PC_L] @@ -201,9 +201,9 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, def extract_target(dataset): if dataset.op == "NULL": if "_P_" in dataset.name or "HERA" in dataset.name: - return "proton" + return "proton" elif "_D_" in dataset.name: - return "deuteron" + return "deuteron" else: raise ValueError(f"No target detected for {dataset.name}") elif dataset.op == "RATIO": @@ -260,7 +260,7 @@ def compute_ht_parametrisation( N2, NL = compute_normalisation_by_experiment(exp, x, y, Q2) PC_2 = N2 * H_2(x) / Q2 - PC_L = NL * H_2(x) / Q2 + PC_L = NL * H_L(x) / Q2 return PC_2, PC_L From 686fe46a29a368fa1d036e79e3ab7a582cc1441d Mon Sep 17 00:00:00 2001 From: achiefa Date: Mon, 5 Aug 2024 15:20:01 +0100 Subject: [PATCH 12/23] Implemented knots in runcard --- .../validphys/theorycovariance/construction.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 43c406b008..727bc75ca7 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -115,7 +115,7 @@ def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_wit return process_info -def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, reverse=False): +def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, ht_knots = list(), reverse: bool = False): """ Same as `thcov_HT` but implementing theory covariance method for each node of the spline. Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some @@ -125,6 +125,7 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, running_index_tot = 0 start_proc_by_exp = defaultdict(list) deltas = defaultdict(list) + x_knots = list() included_proc = ["DIS NC"] excluded_exp = {"DIS NC" : []} included_exp = {} @@ -135,11 +136,14 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, aux.append(exp) included_exp[proc] = aux - # ABMP parametrisation and target masses - x_abmp = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + if len(ht_knots) == 0: + # ABMP parametrisation + x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + else: + x_knots = ht_knots # Check that H2_list and HL_list have the same size as x - if (len(H2_list) != len(x_abmp)) or (len(HL_list) != len(x_abmp)): + if (len(H2_list) != len(x_knots)) or (len(HL_list) != len(x_knots)): raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") for i_proc, proc in enumerate(process_info.namelist.keys()): @@ -164,8 +168,8 @@ def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, target = extract_target(dataset) # Loop over the parameter - for i in range(len(x_abmp)): - PC_2, PC_L = compute_ht_parametrisation(i, x_abmp, kin_dict, exp, H2_list, HL_list, reverse=reverse) + for i in range(len(x_knots)): + PC_2, PC_L = compute_ht_parametrisation(i, x_knots, kin_dict, exp, H2_list, HL_list, reverse=reverse) if target == 'proton': deltas[f"p({i+1}+,0)"] += [PC_2] deltas[f"p(0,{i+1}+)"] += [PC_L] From 129390200eb26c490f64bda9978c0f3a9772a3d9 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 20 Aug 2024 11:13:40 +0100 Subject: [PATCH 13/23] Added valiphys card for chi2 report --- .../theory_covariance/chi2table_ht.yaml | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 validphys2/examples/theory_covariance/chi2table_ht.yaml diff --git a/validphys2/examples/theory_covariance/chi2table_ht.yaml b/validphys2/examples/theory_covariance/chi2table_ht.yaml new file mode 100644 index 0000000000..e1327eebd8 --- /dev/null +++ b/validphys2/examples/theory_covariance/chi2table_ht.yaml @@ -0,0 +1,107 @@ +# This is the driver template for vp-comparefits. It consists on a validphys +# runcard where some settings are missing and are to be filled by the +# vp-comparefits script. The settings below are a sample of such settings, kept +# for reference +# +# meta: +# title: The title of the Report +# keywords: [report_template] +# author: NNPDF Collaboration +# +# current: +# fit: {id: id_of_the_base_fit} +# pdf: {id: id_of_the_base_fit, label: "Current Fit"} +# theory: +# from_: fit +# theoryid: +# from_: theory +# speclabel: "Current Fit" +# +# reference: +# fit: {id: id_of_the_reference_fit} +# pdf: {id: id_of_the_reference_fit, label: "Reference Fit" } +# theory: +# from_: fit +# theoryid: +# from_: theory +# speclabel: "Reference Fit" + +pdfs: + - {id: "240816-06-7-01-lc", label: "HT low cuts"} + - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} + - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} + - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} + - {id: "240807-midcuts", label: "no HT mid cuts"} + - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} + +fits: + - {id: "240816-06-7-01-lc", label: "HT low cuts"} + - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} + - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} + - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} + - {id: "240807-midcuts", label: "no HT mid cuts"} + - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} + +use_cuts: "fromfit" +use_weights_in_covmat: False +use_thcovmat_if_present: True + +Q: 1.651 + +#template: report.md + +description: + from_: fit + +dataset_inputs: + from_: fit + +#dataspecs: +# - theoryid: +# from_: current +# pdf: +# from_: current +# fit: +# from_: current +# speclabel: +# from_: current +# +# - theoryid: +# from_: reference +# pdf: +# from_: reference +# fit: +# from_: reference +# speclabel: +# from_: reference + +Datanorm: + normalize_to: data + +DataGroups: + - metadata_group: nnpdf31_process + - metadata_group: experiment + +ProcessGroup: + metadata_group: nnpdf31_process + +template_text: | + Summary + ------- + {@ summarise_fits @} + + {@with DataGroups@} + $\chi^2$ by {@processed_metadata_group@} + ---------------------------------------- + {@plot_fits_groups_data_chi2@} + {@endwith@} + + $\chi^2$ by dataset + ------------------- + ### Plot + {@plot_fits_datasets_chi2@} + ### Table + {@ProcessGroup fits_chi2_table(show_total=true)@} + +actions_: + - report(main=true) From 8ba86daf27c1cb4993ea2da1aa6770a294cd7232 Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 23 Aug 2024 15:44:41 +0100 Subject: [PATCH 14/23] First implementation of HT at the level of theory predictions --- n3fit/src/n3fit/layers/DIS.py | 61 ++++++++++++++++++++++++++++++-- n3fit/src/n3fit/model_gen.py | 51 ++++++++++++++++++++++++-- n3fit/src/n3fit/model_trainer.py | 1 + 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index 072d423e95..d7f15b24bd 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -23,9 +23,12 @@ """ import numpy as np +from scipy import interpolate as scint from n3fit.backends import operations as op +from validphys.theorycovariance.construction import compute_normalisation_by_experiment + from .observable import Observable @@ -39,6 +42,49 @@ class DIS(Observable): while the input pdf is rank 4 of shape (batch_size, replicas, xgrid, flavours) """ + def __init__(self, fktable_data, fktable_arr, dataset_name, boundary_condition=None, operation_name="NULL", nfl=14, n_replicas=1, exp_kinematics=None, **kwargs): + super().__init__(fktable_data, fktable_arr, dataset_name, boundary_condition, operation_name, nfl, n_replicas, **kwargs) + + self.power_corrections = None + if exp_kinematics is not None: + self.exp_kinematics = exp_kinematics + self.power_corrections = self.compute_abmp_parametrisation() + + def compute_abmp_parametrisation(self): + """ + This function is very similar to `compute_ht_parametrisation` in + validphys.theorycovariance.construction.py. However, the latter + accounts for shifts in the 5pt prescription. As of now, this function + is meant to work only for DIS NC data, using the ABMP16 result. + """ + x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] + y_h2 = [0.023, -0.032, -0.005, 0.025, 0.051, 0.003, 0.0] + y_ht = [-0.319, -0.134, -0.052, 0.071, 0.030, 0.003, 0.0] + h2_sigma = [0.019, 0.013, 0.009, 0.006, 0.005, 0.004] + ht_sigma = [0.126, 0.040, 0.030, 0.025, 0.012, 0.007] + H_2 = scint.CubicSpline(x_knots, y_h2) + H_T = scint.CubicSpline(x_knots, y_ht) + + # Reconstruct HL from HT and H2 + def H_L(x): + return (H_2(x) - np.power(x, 0.05) * H_T(x)) + + H_2 = np.vectorize(H_2) + H_L = np.vectorize(H_L) + + x = self.exp_kinematics['kin1'] + y = self.exp_kinematics['kin3'] + Q2 = self.exp_kinematics['kin2'] + N2, NL = compute_normalisation_by_experiment(self.dataname, x, y, Q2) + + PC_2 = N2 * H_2(x) / Q2 + PC_L = NL * H_L(x) / Q2 + power_correction = PC_2 + PC_L + power_correction = power_correction.to_numpy() + + return power_correction + + def gen_mask(self, basis): """ Receives a list of active flavours and generates a boolean mask tensor @@ -85,7 +131,11 @@ def build(self, input_shape): if self.num_replicas > 1: self.compute_observable = compute_dis_observable_many_replica else: - self.compute_observable = compute_dis_observable_one_replica + # Currying the function so that the `Observable` does not need + # to get modified + def compute_dis_observable_one_replica_w_pc(pdf, padded_fk): + return compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = self.power_corrections) + self.compute_observable = compute_dis_observable_one_replica_w_pc def compute_dis_observable_many_replica(pdf, padded_fk): @@ -107,9 +157,14 @@ def compute_dis_observable_many_replica(pdf, padded_fk): return op.einsum('brxf, nxf -> brn', pdf[0], padded_fk) -def compute_dis_observable_one_replica(pdf, padded_fk): +def compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = None): """ Same operations as above but a specialized implementation that is more efficient for 1 replica, masking the PDF rather than the fk table. """ - return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) + if power_corrections is None: + + return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) + else: + + return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) + power_corrections diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 01a4bdc7f2..18b224de96 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -135,6 +135,7 @@ def observable_generator( positivity_initial=1.0, integrability=False, n_replicas=1, + exp_data=None ): # pylint: disable=too-many-locals """ This function generates the observable models for each experiment. @@ -196,10 +197,39 @@ def observable_generator( dataset_xsizes = [] model_inputs = [] model_observables = [] + #print_x_grid = np.array([]) for the NTK + + kin_by_dict = {} + if exp_data is not None: + included_processes = [ + 'DEUTERON', + 'NMC', + 'NUCLEAR' + ] + + for process in exp_data: + commondata = process.load_commondata() + for dataset in commondata: + if process.name in included_processes and "_NC_" in dataset.setname: + kin_by_dict[dataset.setname] = dataset.kinematics + else: + kin_by_dict[dataset.setname] = None + # The first step is to compute the observable for each of the datasets for dataset in spec_dict["datasets"]: # Get the generic information of the dataset dataset_name = dataset.name + kinematics = None + + if exp_data is not None: + if kin_by_dict[dataset_name] is not None: + kinematics = kin_by_dict[dataset_name] + + ########## For the NTK + #import ipdb; ipdb.set_trace() + #fktables_data = dataset.fktables_data[0] + #print_x_grid = np.concatenate((print_x_grid, fktables_data.xgrid)) + #print(print_x_grid.size) # Look at what kind of layer do we need for this dataset if dataset.hadronic: @@ -216,7 +246,9 @@ def observable_generator( # list of validphys.coredata.FKTableData objects # these will then be used to check how many different pdf inputs are needed # (and convolutions if given the case) - obs_layer = Obs_Layer( + # BAD IMPLEMENTATION, but effective + if dataset.hadronic: + obs_layer = Obs_Layer( dataset.fktables_data, dataset.fktables(), dataset_name, @@ -224,7 +256,18 @@ def observable_generator( operation_name, n_replicas=n_replicas, name=f"dat_{dataset_name}", - ) + ) + else: + obs_layer = Obs_Layer( + dataset.fktables_data, + dataset.fktables(), + dataset_name, + boundary_condition, + operation_name, + n_replicas=n_replicas, + name=f"dat_{dataset_name}", + exp_kinematics=kinematics, + ) # If the observable layer found that all input grids are equal, the splitting will be None # otherwise the different xgrids need to be stored separately @@ -240,6 +283,10 @@ def observable_generator( model_observables.append(obs_layer) + # Again, for the NTK + #with open(f'x_grid/xgrid_{spec_name}.npy', 'wb') as f: + # np.save(f, print_x_grid) + # Check whether all xgrids of all observables in this experiment are equal # if so, simplify the model input if is_unique(model_inputs): diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 9917e22c89..9c0dc00961 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -577,6 +577,7 @@ def _generate_observables( invcovmat_tr=experiment_data["invcovmat"][i], invcovmat_vl=experiment_data["invcovmat_vl"][i], n_replicas=len(self.replicas), + exp_data=self.experiments_data ) # Save the input(s) corresponding to this experiment From e75bf1f5788db461ba0189e8c8e121466c48fc58 Mon Sep 17 00:00:00 2001 From: achiefa Date: Sat, 21 Sep 2024 11:23:04 +0100 Subject: [PATCH 15/23] Allowed theory HT in runcard - added HERACOMB in HT calculations Excluded HERACOMB Hacking NMC dataset Grouping kinematics Reimplementing thcovmat Added comment in HT for DIS Corrected normalisation for SIGMARED DIS NC data sets Removing unused code Added HT for F2C data (EMC) - removed deprecated function Corrected EMC data iron target Removed deprecated code Refactoring + DIS CC Corrected bug - ready for cc test Corrected bug - ready Removing unnecessary code Corrected bug after rebase Add normalisation in CC x-secs Correct normalisation Restore n3fit files from master remove _PB suffix from process type format a bit Correct typo + example runcard Update for new thcovmat construction + refactor + docstrings Correct bug Correct nuclear factors for nuclear targets First implementation of jet data Change pc jet dependence from pT to eta Allowing step-function for the prior Vectorize step_function + docstring Correct bug in step function Correct bug in step function Produce covs_pt_prescrip Allow different funcs for posterior Adjusting linear triangular function Correct bug in linear function Implement multiplicative PC for jet Correct docstring Remove unused vp runcard Dijet + clean-up + checks for pc dict Remove translation layer for pc parameters Remove copy of the same collection procs_data -> groups_data_by_process + clean-up Remove unused collect Update basic runcard Allow generation of L1 data Jets with single parameters Adjust format Restoring nodes for jets and dijets --- .../examples/Basic_runcard_pc_covmat.yml | 151 +++ n3fit/src/n3fit/layers/DIS.py | 61 +- n3fit/src/n3fit/model_gen.py | 51 +- n3fit/src/n3fit/model_trainer.py | 1 - n3fit/src/n3fit/performfit.py | 2 +- n3fit/src/n3fit/scripts/vp_setupfit.py | 6 +- .../theory_covariance/chi2table_ht.yaml | 107 -- validphys2/src/validphys/checks.py | 20 + validphys2/src/validphys/commondata.py | 4 - validphys2/src/validphys/config.py | 85 +- validphys2/src/validphys/dataplots.py | 12 +- validphys2/src/validphys/results.py | 12 +- .../theorycovariance/construction.py | 352 ++---- .../higher_twist_functions.py | 1120 +++++++++++++++++ 14 files changed, 1461 insertions(+), 523 deletions(-) create mode 100644 n3fit/runcards/examples/Basic_runcard_pc_covmat.yml delete mode 100644 validphys2/examples/theory_covariance/chi2table_ht.yaml create mode 100644 validphys2/src/validphys/theorycovariance/higher_twist_functions.py diff --git a/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml b/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml new file mode 100644 index 0000000000..991e5a7cba --- /dev/null +++ b/n3fit/runcards/examples/Basic_runcard_pc_covmat.yml @@ -0,0 +1,151 @@ +# +# Configuration file for n3fit +# +###################################################################################### +description: NNPDF4.0 ht with TCM - DIS (NC & CC) only + +###################################################################################### +dataset_inputs: +- {dataset: NMC_NC_NOTFIXED_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: SLAC_NC_NOTFIXED_D_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: BCDMS_NC_NOTFIXED_P_EM-F2, frac: 0.75, variant: legacy_dw} +- {dataset: CHORUS_CC_NOTFIXED_PB_NU-SIGMARED, frac: 0.75, variant: legacy_dw} +- {dataset: NUTEV_CC_NOTFIXED_FE_NB-SIGMARED, cfac: [MAS], frac: 0.75, variant: legacy_dw} +- {dataset: HERA_CC_318GEV_EP-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_318GEV_EAVG_CHARM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED, frac: 0.75, variant: legacy} +- {dataset: DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO, frac: 0.75, variant: legacy} +- {dataset: CDF_Z0_1P96TEV_ZRAP, frac: 0.75, variant: legacy} +- {dataset: ATLAS_Z0J_8TEV_PT-Y, frac: 0.75, variant: legacy_10} +- {dataset: ATLAS_1JET_8TEV_R06_PTY, frac: 0.75, variant: legacy_decorrelated} +- {dataset: ATLAS_2JET_7TEV_R06_M12Y, frac: 0.75, variant: legacy} +- {dataset: CMS_2JET_7TEV_M12Y, frac: 0.75} +- {dataset: CMS_1JET_8TEV_PTY, frac: 0.75, variant: legacy} +- {dataset: LHCB_Z0_13TEV_DIELECTRON-Y, frac: 0.75} + +################################################################################ +datacuts: + t0pdfset: 240701-02-rs-nnpdf40-baseline + q2min: 2.5 + w2min: 3.24 + +################################################################################ +# NNLO QCD TRN evolution +theory: + theoryid: 708 + +theorycovmatconfig: + point_prescriptions: ["9 point", "power corrections"] + pc_parameters: + H2p: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + H2d: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + HLp: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + HLd: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + H3p: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + H3d: {yshift: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.0], nodes: [0.0, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1]} + Hj: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25, 2.75]} + H2j_ATLAS: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25, 2.75]} + H2j_CMS: {yshift: [2.0, 2.0, 2.0, 2.0, 2.0], nodes: [0.25, 0.75, 1.25, 1.75, 2.25]} + pc_included_procs: ["JETS", "DIJET", "DIS NC", "DIS CC"] + pc_excluded_exps: [HERA_NC_318GEV_EAVG_CHARM-SIGMARED, + HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED,] + pdf: 210619-n3fit-001 + use_thcovmat_in_fitting: true + use_thcovmat_in_sampling: true +resample_negative_pseudodata: false + +# For fits <= 4.0 multiplicative and additive uncertainties were sampled separately +# and thus the flag `separate_multiplicative` needs to be set to True +# sampling: +# separate_multiplicative: True + +################################################################################ +trvlseed: 591866982 +nnseed: 945709987 +mcseed: 519562661 +genrep: true + +################################################################################ +parameters: # This defines the parameter dictionary that is passed to the Model Trainer + nodes_per_layer: [25, 20, 8] + activation_per_layer: [tanh, tanh, linear] + initializer: glorot_normal + optimizer: + clipnorm: 6.073e-6 + learning_rate: 2.621e-3 + optimizer_name: Nadam + epochs: 3000 + positivity: + initial: 184.8 + multiplier: + integrability: + initial: 10 + multiplier: + stopping_patience: 0.1 + layer_type: dense + dropout: 0.0 + threshold_chi2: 3.5 + +fitting: + fitbasis: EVOL + savepseudodata: True + basis: + - {fl: sng, trainable: false, smallx: [1.089, 1.119], largex: [1.475, 3.119]} + - {fl: g, trainable: false, smallx: [0.7504, 1.098], largex: [2.814, 5.669]} + - {fl: v, trainable: false, smallx: [0.479, 0.7384], largex: [1.549, 3.532]} + - {fl: v3, trainable: false, smallx: [0.1073, 0.4397], largex: [1.733, 3.458]} + - {fl: v8, trainable: false, smallx: [0.5507, 0.7837], largex: [1.516, 3.356]} + - {fl: t3, trainable: false, smallx: [-0.4506, 0.9305], largex: [1.745, 3.424]} + - {fl: t8, trainable: false, smallx: [0.5877, 0.8687], largex: [1.522, 3.515]} + - {fl: t15, trainable: false, smallx: [1.089, 1.141], largex: [1.492, 3.222]} + +################################################################################ +positivity: + posdatasets: + # Positivity Lagrange Multiplier + - {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_F2D, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_F2S, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_DYU, maxlambda: 1e10} + - {dataset: NNPDF_POS_2P24GEV_DYD, maxlambda: 1e10} + - {dataset: NNPDF_POS_2P24GEV_DYS, maxlambda: 1e10} + - {dataset: NNPDF_POS_2P24GEV_F2C, maxlambda: 1e6} + # Positivity of MSbar PDFs + - {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XUB, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XDQ, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XDB, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XSQ, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XSB, maxlambda: 1e6} + - {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6} + +added_filter_rules: + - dataset: NNPDF_POS_2P24GEV_FLL + rule: "x > 5.0e-7" + - dataset: NNPDF_POS_2P24GEV_F2C + rule: "x < 0.74" + - dataset: NNPDF_POS_2P24GEV_XGL + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XUQ + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XUB + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XDQ + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XDB + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XSQ + rule: "x > 0.1" + - dataset: NNPDF_POS_2P24GEV_XSB + rule: "x > 0.1" + +integrability: + integdatasets: + - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2} + - {dataset: NNPDF_INTEG_3GEV_XT3, maxlambda: 1e2} + +################################################################################ +debug: false +maxcores: 8 diff --git a/n3fit/src/n3fit/layers/DIS.py b/n3fit/src/n3fit/layers/DIS.py index d7f15b24bd..072d423e95 100644 --- a/n3fit/src/n3fit/layers/DIS.py +++ b/n3fit/src/n3fit/layers/DIS.py @@ -23,12 +23,9 @@ """ import numpy as np -from scipy import interpolate as scint from n3fit.backends import operations as op -from validphys.theorycovariance.construction import compute_normalisation_by_experiment - from .observable import Observable @@ -42,49 +39,6 @@ class DIS(Observable): while the input pdf is rank 4 of shape (batch_size, replicas, xgrid, flavours) """ - def __init__(self, fktable_data, fktable_arr, dataset_name, boundary_condition=None, operation_name="NULL", nfl=14, n_replicas=1, exp_kinematics=None, **kwargs): - super().__init__(fktable_data, fktable_arr, dataset_name, boundary_condition, operation_name, nfl, n_replicas, **kwargs) - - self.power_corrections = None - if exp_kinematics is not None: - self.exp_kinematics = exp_kinematics - self.power_corrections = self.compute_abmp_parametrisation() - - def compute_abmp_parametrisation(self): - """ - This function is very similar to `compute_ht_parametrisation` in - validphys.theorycovariance.construction.py. However, the latter - accounts for shifts in the 5pt prescription. As of now, this function - is meant to work only for DIS NC data, using the ABMP16 result. - """ - x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] - y_h2 = [0.023, -0.032, -0.005, 0.025, 0.051, 0.003, 0.0] - y_ht = [-0.319, -0.134, -0.052, 0.071, 0.030, 0.003, 0.0] - h2_sigma = [0.019, 0.013, 0.009, 0.006, 0.005, 0.004] - ht_sigma = [0.126, 0.040, 0.030, 0.025, 0.012, 0.007] - H_2 = scint.CubicSpline(x_knots, y_h2) - H_T = scint.CubicSpline(x_knots, y_ht) - - # Reconstruct HL from HT and H2 - def H_L(x): - return (H_2(x) - np.power(x, 0.05) * H_T(x)) - - H_2 = np.vectorize(H_2) - H_L = np.vectorize(H_L) - - x = self.exp_kinematics['kin1'] - y = self.exp_kinematics['kin3'] - Q2 = self.exp_kinematics['kin2'] - N2, NL = compute_normalisation_by_experiment(self.dataname, x, y, Q2) - - PC_2 = N2 * H_2(x) / Q2 - PC_L = NL * H_L(x) / Q2 - power_correction = PC_2 + PC_L - power_correction = power_correction.to_numpy() - - return power_correction - - def gen_mask(self, basis): """ Receives a list of active flavours and generates a boolean mask tensor @@ -131,11 +85,7 @@ def build(self, input_shape): if self.num_replicas > 1: self.compute_observable = compute_dis_observable_many_replica else: - # Currying the function so that the `Observable` does not need - # to get modified - def compute_dis_observable_one_replica_w_pc(pdf, padded_fk): - return compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = self.power_corrections) - self.compute_observable = compute_dis_observable_one_replica_w_pc + self.compute_observable = compute_dis_observable_one_replica def compute_dis_observable_many_replica(pdf, padded_fk): @@ -157,14 +107,9 @@ def compute_dis_observable_many_replica(pdf, padded_fk): return op.einsum('brxf, nxf -> brn', pdf[0], padded_fk) -def compute_dis_observable_one_replica(pdf, padded_fk, power_corrections = None): +def compute_dis_observable_one_replica(pdf, padded_fk): """ Same operations as above but a specialized implementation that is more efficient for 1 replica, masking the PDF rather than the fk table. """ - if power_corrections is None: - - return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) - else: - - return op.tensor_product(pdf, padded_fk, axes=[(2, 3), (1, 2)]) + power_corrections + return op.tensor_product(pdf[0], padded_fk, axes=[(2, 3), (1, 2)]) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 18b224de96..01a4bdc7f2 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -135,7 +135,6 @@ def observable_generator( positivity_initial=1.0, integrability=False, n_replicas=1, - exp_data=None ): # pylint: disable=too-many-locals """ This function generates the observable models for each experiment. @@ -197,39 +196,10 @@ def observable_generator( dataset_xsizes = [] model_inputs = [] model_observables = [] - #print_x_grid = np.array([]) for the NTK - - kin_by_dict = {} - if exp_data is not None: - included_processes = [ - 'DEUTERON', - 'NMC', - 'NUCLEAR' - ] - - for process in exp_data: - commondata = process.load_commondata() - for dataset in commondata: - if process.name in included_processes and "_NC_" in dataset.setname: - kin_by_dict[dataset.setname] = dataset.kinematics - else: - kin_by_dict[dataset.setname] = None - # The first step is to compute the observable for each of the datasets for dataset in spec_dict["datasets"]: # Get the generic information of the dataset dataset_name = dataset.name - kinematics = None - - if exp_data is not None: - if kin_by_dict[dataset_name] is not None: - kinematics = kin_by_dict[dataset_name] - - ########## For the NTK - #import ipdb; ipdb.set_trace() - #fktables_data = dataset.fktables_data[0] - #print_x_grid = np.concatenate((print_x_grid, fktables_data.xgrid)) - #print(print_x_grid.size) # Look at what kind of layer do we need for this dataset if dataset.hadronic: @@ -246,9 +216,7 @@ def observable_generator( # list of validphys.coredata.FKTableData objects # these will then be used to check how many different pdf inputs are needed # (and convolutions if given the case) - # BAD IMPLEMENTATION, but effective - if dataset.hadronic: - obs_layer = Obs_Layer( + obs_layer = Obs_Layer( dataset.fktables_data, dataset.fktables(), dataset_name, @@ -256,18 +224,7 @@ def observable_generator( operation_name, n_replicas=n_replicas, name=f"dat_{dataset_name}", - ) - else: - obs_layer = Obs_Layer( - dataset.fktables_data, - dataset.fktables(), - dataset_name, - boundary_condition, - operation_name, - n_replicas=n_replicas, - name=f"dat_{dataset_name}", - exp_kinematics=kinematics, - ) + ) # If the observable layer found that all input grids are equal, the splitting will be None # otherwise the different xgrids need to be stored separately @@ -283,10 +240,6 @@ def observable_generator( model_observables.append(obs_layer) - # Again, for the NTK - #with open(f'x_grid/xgrid_{spec_name}.npy', 'wb') as f: - # np.save(f, print_x_grid) - # Check whether all xgrids of all observables in this experiment are equal # if so, simplify the model input if is_unique(model_inputs): diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 9c0dc00961..9917e22c89 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -577,7 +577,6 @@ def _generate_observables( invcovmat_tr=experiment_data["invcovmat"][i], invcovmat_vl=experiment_data["invcovmat_vl"][i], n_replicas=len(self.replicas), - exp_data=self.experiments_data ) # Save the input(s) corresponding to this experiment diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index 312885de4c..4b589fcd22 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -41,7 +41,7 @@ def performfit( debug=False, maxcores=None, double_precision=False, - parallel_models=True, + parallel_models=False, ): """ This action will (upon having read a validcard) process a full PDF fit diff --git a/n3fit/src/n3fit/scripts/vp_setupfit.py b/n3fit/src/n3fit/scripts/vp_setupfit.py index e9af7c73c8..849207ecc8 100644 --- a/n3fit/src/n3fit/scripts/vp_setupfit.py +++ b/n3fit/src/n3fit/scripts/vp_setupfit.py @@ -216,10 +216,8 @@ def from_yaml(cls, o, *args, **kwargs): # Check positivity bound if file_content.get('positivity_bound') is not None: SETUPFIT_FIXED_CONFIG['actions_'].append('positivity_bound check_unpolarized_bc') - if (sam_t0 := file_content.get('sampling')) is not None: - SETUPFIT_FIXED_CONFIG['separate_multiplicative'] = sam_t0.get( - 'separate_multiplicative', False - ) + + # Sets default values if they are not present in the runcard for k, v in SETUPFIT_DEFAULTS.items(): file_content.setdefault(k, v) diff --git a/validphys2/examples/theory_covariance/chi2table_ht.yaml b/validphys2/examples/theory_covariance/chi2table_ht.yaml deleted file mode 100644 index e1327eebd8..0000000000 --- a/validphys2/examples/theory_covariance/chi2table_ht.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# This is the driver template for vp-comparefits. It consists on a validphys -# runcard where some settings are missing and are to be filled by the -# vp-comparefits script. The settings below are a sample of such settings, kept -# for reference -# -# meta: -# title: The title of the Report -# keywords: [report_template] -# author: NNPDF Collaboration -# -# current: -# fit: {id: id_of_the_base_fit} -# pdf: {id: id_of_the_base_fit, label: "Current Fit"} -# theory: -# from_: fit -# theoryid: -# from_: theory -# speclabel: "Current Fit" -# -# reference: -# fit: {id: id_of_the_reference_fit} -# pdf: {id: id_of_the_reference_fit, label: "Reference Fit" } -# theory: -# from_: fit -# theoryid: -# from_: theory -# speclabel: "Reference Fit" - -pdfs: - - {id: "240816-06-7-01-lc", label: "HT low cuts"} - - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} - - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} - - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} - - {id: "240807-midcuts", label: "no HT mid cuts"} - - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} - -fits: - - {id: "240816-06-7-01-lc", label: "HT low cuts"} - - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"} - - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"} - - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"} - - {id: "240807-midcuts", label: "no HT mid cuts"} - - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"} - -use_cuts: "fromfit" -use_weights_in_covmat: False -use_thcovmat_if_present: True - -Q: 1.651 - -#template: report.md - -description: - from_: fit - -dataset_inputs: - from_: fit - -#dataspecs: -# - theoryid: -# from_: current -# pdf: -# from_: current -# fit: -# from_: current -# speclabel: -# from_: current -# -# - theoryid: -# from_: reference -# pdf: -# from_: reference -# fit: -# from_: reference -# speclabel: -# from_: reference - -Datanorm: - normalize_to: data - -DataGroups: - - metadata_group: nnpdf31_process - - metadata_group: experiment - -ProcessGroup: - metadata_group: nnpdf31_process - -template_text: | - Summary - ------- - {@ summarise_fits @} - - {@with DataGroups@} - $\chi^2$ by {@processed_metadata_group@} - ---------------------------------------- - {@plot_fits_groups_data_chi2@} - {@endwith@} - - $\chi^2$ by dataset - ------------------- - ### Plot - {@plot_fits_datasets_chi2@} - ### Table - {@ProcessGroup fits_chi2_table(show_total=true)@} - -actions_: - - report(main=true) diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py index 4f958b0dc4..81844c4160 100644 --- a/validphys2/src/validphys/checks.py +++ b/validphys2/src/validphys/checks.py @@ -361,3 +361,23 @@ def check_darwin_single_process(NPROC): """ if platform.system() == "Darwin" and NPROC != 1: raise CheckError("NPROC must be set to 1 on OSX, because multithreading is not supported.") + + +@make_argcheck +def check_pc_parameters(pc_parameters, pc_func_type): + """Check that the parameters for the PC method are set correctly""" + for par in pc_parameters.values(): + # Check that the length of shifts is one less than the length of nodes. + if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type not in [ + 'cubic', + 'linear', + ]: + raise ValueError( + f"The length of nodes does not match that of the list in {par['ht']}." + f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" + ) + elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type in ['cubic', 'linear']: + raise ValueError( + f"The length of nodes does not match that of the list in {par['ht']}." + f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" + ) diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py index e59ac5ce2c..7c68c0784f 100644 --- a/validphys2/src/validphys/commondata.py +++ b/validphys2/src/validphys/commondata.py @@ -38,7 +38,3 @@ def loaded_commondata_with_cuts(commondata, cuts): groups_dataset_inputs_loaded_cd_with_cuts = collect( "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input") ) - -groups_dataset_inputs_loaded_cd_with_cuts_byprocess = collect( - "loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data") - ) \ No newline at end of file diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 3c06912530..8171d625e8 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1294,29 +1294,6 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""): l = self.loader fileloc = l.check_vp_output_file(user_covmat_path) return fileloc - - - @configparser.explicit_node - def produce_covmat_custom(self, use_ht_uncertainties: bool = False): - if use_ht_uncertainties: - from validphys.theorycovariance.construction import thcov_ht - - return thcov_ht - else: - from validphys.theorycovariance.construction import covs_pt_prescrip - - return covs_pt_prescrip - - @configparser.explicit_node - def produce_combine_custom(self, use_ht_uncertainties: bool = False): - if use_ht_uncertainties: - from validphys.theorycovariance.construction import combine_by_type_ht - - return combine_by_type_ht - else: - from validphys.theorycovariance.construction import combine_by_type - - return combine_by_type @configparser.explicit_node def produce_nnfit_theory_covmat( @@ -1344,33 +1321,8 @@ def produce_nnfit_theory_covmat( from validphys.theorycovariance.construction import user_covmat_fitting f = user_covmat_fitting - elif use_ht_uncertainties: - # NOTE: this covmat is the same as for scale variations, which will result in a clash of - # table names if we wish to use them simultaneously - if use_user_uncertainties: - from validphys.theorycovariance.construction import total_theory_covmat_fitting - - f = total_theory_covmat_fitting - else: - from validphys.theorycovariance.construction import theory_covmat_custom_fitting - - f = theory_covmat_custom_fitting - - @functools.wraps(f) - def res(*args, **kwargs): - return f(*args, **kwargs) - - # Set this to get the same filename regardless of the action. - res.__name__ = "theory_covmat" - return res - - - @configparser.explicit_node - def produce_combine_by_type_custom(self, use_ht_uncertainties: bool = False): - if use_ht_uncertainties: - return validphys.theorycovariance.construction.combine_by_type_ht - return validphys.theorycovariance.construction.combine_by_type + return f def produce_fitthcovmat( self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None @@ -1894,6 +1846,8 @@ def produce_theoryids(self, t0id, point_prescription): prescription. The options for the latter are defined in pointprescriptions.yaml. This hard codes the theories needed for each prescription to avoid user error.""" th = t0id.id + if point_prescription == 'power corrections': + return NSList([t0id], nskey="theoryid") lsv = yaml_safe.load(read_text(validphys.scalevariations, "scalevariationtheoryids.yaml")) @@ -1976,7 +1930,18 @@ def produce_filter_data( if not fakedata: return validphys.filters.filter_real_data else: - if inconsistent_fakedata: + # TODO we don't want to sample from the theory covmat for L1 data, + # but we do want to use the theory covmat for L2 data + if theorycovmatconfig is not None and theorycovmatconfig.get( + "use_thcovmat_in_fakedata_sampling" + ): + # NOTE: By the time we run theory covmat closure tests, + # hopefully the generation of pseudodata will be done in python. + raise ConfigError( + "Generating L1 closure test data which samples from the theory " + "covariance matrix has not been implemented yet." + ) + elif inconsistent_fakedata: log.info("Using filter for inconsistent closure data") return validphys.filters.filter_inconsistent_closure_data_by_experiment @@ -2004,6 +1969,26 @@ def produce_total_phi_data(self, fitthcovmat): return validphys.results.total_phi_data_from_experiments return validphys.results.dataset_inputs_phi_data + # TODO: to be removed once we are sure the the triangular + # function for the prior is the only one of interest + def produce_pc_func_type(self, theorycovmatconfig=None): + if theorycovmatconfig is None: + raise ValueError("theorycovmatconfig is defined in the runcard.") + return theorycovmatconfig.get('func_type', 'linear') + + @configparser.explicit_node + def produce_covs_pt_prescrip(self, point_prescription): + if point_prescription != 'power corrections': + from validphys.theorycovariance.construction import covs_pt_prescrip_mhou + + f = covs_pt_prescrip_mhou + else: + from validphys.theorycovariance.construction import covs_pt_prescrip_pc + + f = covs_pt_prescrip_pc + + return f + class Config(report.Config, CoreConfig): """The effective configuration parser class.""" diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index e96bc9ccd7..3d75b837aa 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -2,8 +2,6 @@ Plots of relations between data PDFs and fits. """ -from __future__ import generator_stop - from collections import defaultdict from collections.abc import Sequence import itertools @@ -28,7 +26,7 @@ from validphys.core import CutsPolicy, MCStats, cut_mask, load_commondata from validphys.plotoptions.core import get_info, kitable, transform_result from validphys.results import chi2_stat_labels, chi2_stats -from validphys.sumrules import POL_LIMS, partial_polarized_sum_rules +from validphys.sumrules import POL_LIMS from validphys.utils import sane_groupby_iter, scale_from_grid, split_ranges from validphys.commondata import loaded_commondata_with_cuts from validphys.covmats import shifts_from_systematics @@ -1567,7 +1565,7 @@ def next_options(): # if group is None then make sure that shows on legend. key = str(group) elif marker_by == "kinematics": - key = None + key = None else: raise ValueError('Unknown marker_by value') @@ -1599,8 +1597,10 @@ def next_options(): # This is to get the label key coords = [], [] if marker_by == "kinematics": - ht_magnitude = np.concatenate( cvdict[key]) / (coords[1] * (1 - coords[0]) ) - out = ax.scatter(*coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm()) + ht_magnitude = np.concatenate(cvdict[key]) / (coords[1] * (1 - coords[0])) + out = ax.scatter( + *coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm() + ) clb = fig.colorbar(out) clb.ax.set_title(r'$F_\mathrm{exp}\frac{1}{Q^2(1-x)}$') ax.plot(*coords, label=key, markeredgewidth=1, markeredgecolor=None, **key_options[key]) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 27f320484c..49383ebfa9 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -239,7 +239,7 @@ def data_index(data): experiments_data = collect("data", ("group_dataset_inputs_by_experiment",)) -procs_data = collect("data", ("group_dataset_inputs_by_process",)) +groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) def groups_index(groups_data, diagonal_basis=True): @@ -280,8 +280,8 @@ def experiments_index(experiments_data, diagonal_basis=True): return groups_index(experiments_data, diagonal_basis) -def procs_index(procs_data): - return groups_index(procs_data) +def procs_index(groups_data_by_process): + return groups_index(groups_data_by_process) def groups_data_values(group_result_table): @@ -814,10 +814,12 @@ def groups_chi2_table(groups_data, pdf, groups_chi2, groups_each_dataset_chi2): @table -def procs_chi2_table(procs_data, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process): +def procs_chi2_table( + groups_data_by_process, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process +): """Same as groups_chi2_table but by process""" return groups_chi2_table( - procs_data, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process + groups_data_by_process, pdf, groups_chi2_by_process, groups_each_dataset_chi2_by_process ) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 727bc75ca7..05b4a70747 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -4,21 +4,20 @@ """ from collections import defaultdict, namedtuple +import dataclasses import logging -import operator import numpy as np import pandas as pd -import scipy.linalg as la -import scipy.interpolate as scint from reportengine import collect from reportengine.table import table pass -from validphys.results import results, results_central -from validphys.convolution import central_fk_predictions +from validphys.checks import check_pc_parameters from validphys.core import PDF +from validphys.results import results, results_central +from validphys.theorycovariance.higher_twist_functions import compute_deltas_pc from validphys.theorycovariance.theorycovarianceutils import ( check_correct_theory_combination, check_fit_dataset_order_matches_grouped, @@ -52,11 +51,18 @@ def theory_covmat_dataset(results, results_central_bytheoryids, point_prescripti return thcovmat -ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes", "data")) +@dataclasses.dataclass(frozen=True) +class ProcessInfo: + """Dataclass containing the information needed to construct the theory covariance matrix.""" + preds: dict + namelist: dict + sizes: dict + data_spec: dict -def combine_by_type(each_dataset_results_central_bytheory): - """Groups the datasets bu process and returns an instance of the ProcessInfo class + +def combine_by_type(each_dataset_results_central_bytheory, groups_data_by_process): + """Groups the datasets by process and returns an instance of the ProcessInfo class Parameters ---------- @@ -73,6 +79,7 @@ def combine_by_type(each_dataset_results_central_bytheory): dataset_size = defaultdict(list) theories_by_process = defaultdict(list) ordered_names = defaultdict(list) + data_spec = defaultdict(list) for dataset in each_dataset_results_central_bytheory: name = dataset[0][0].name theory_centrals = [x[1].central_value for x in dataset] @@ -82,230 +89,19 @@ def combine_by_type(each_dataset_results_central_bytheory): theories_by_process[proc_type].append(theory_centrals) for key, item in theories_by_process.items(): theories_by_process[key] = np.concatenate(item, axis=1) - process_info = ProcessInfo( - preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=None - ) - return process_info -def combine_by_type_ht(each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess): - """same as combine_by_type but now for a single theory and including commondata info""" - dataset_size = defaultdict(list) - theories_by_process = defaultdict(list) - cd_by_process = defaultdict(list) - ordered_names = defaultdict(list) - for dataset, cd in zip( - each_dataset_results, groups_dataset_inputs_loaded_cd_with_cuts_byprocess - ): - name = cd.setname - if name != dataset[0].name: - raise ValueError("The underlying datasets do not match!") - theory_centrals = [x.central_value for x in dataset] - dataset_size[name] = len(theory_centrals[0]) - proc_type = process_lookup(name) - ordered_names[proc_type].append(name) - cd_by_process[proc_type].append(cd.kinematics.values) - theories_by_process[proc_type].append(theory_centrals) + # Store DataGroupSpecs instances + for group_proc in groups_data_by_process: + for exp_set in group_proc.datasets: + data_spec[exp_set.name] = exp_set - for key in theories_by_process.keys(): - theories_by_process[key] = np.concatenate(theories_by_process[key], axis=1) - cd_by_process[key] = np.concatenate(cd_by_process[key], axis=0) process_info = ProcessInfo( - preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data=cd_by_process + preds=theories_by_process, namelist=ordered_names, sizes=dataset_size, data_spec=data_spec ) return process_info -def thcov_ht(combine_by_type_ht, H2_list, HL_list, groups_data_by_process, pdf, ht_knots = list(), reverse: bool = False): - """ - Same as `thcov_HT` but implementing theory covariance method for each node of the spline. - Note that 'groups_data_by_process' contains the same info as 'combine_by_type_ht'. At some - point we should use only one of them. - """ - process_info = combine_by_type_ht - running_index_tot = 0 - start_proc_by_exp = defaultdict(list) - deltas = defaultdict(list) - x_knots = list() - included_proc = ["DIS NC"] - excluded_exp = {"DIS NC" : []} - included_exp = {} - for proc in included_proc: - aux = [] - for exp in process_info.namelist[proc]: - if exp not in excluded_exp[proc]: - aux.append(exp) - included_exp[proc] = aux - - if len(ht_knots) == 0: - # ABMP parametrisation - x_knots = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1] - else: - x_knots = ht_knots - - # Check that H2_list and HL_list have the same size as x - if (len(H2_list) != len(x_knots)) or (len(HL_list) != len(x_knots)): - raise ValueError(f"The size of HT parameters does not match the number of nodes in the spline.") - - for i_proc, proc in enumerate(process_info.namelist.keys()): - running_index_proc = 0 - - for i_exp, exp in enumerate(process_info.namelist[proc]): - # Locate position of the experiment - size = process_info.sizes[exp] - dataset = groups_data_by_process[i_proc].datasets[i_exp] - start_proc_by_exp[exp] = running_index_tot - running_index_tot += size - running_index_proc += size - kin_dict = {} - - # Compute shifts only for a subset of processes - if proc in included_proc and exp in included_exp[proc]: - #central = process_info.preds[proc][1][start_proc_by_exp[exp] : size] # Probably this is deprecated - kin_dict['x'] = process_info.data[proc].T[0][running_index_proc - size : running_index_proc] - kin_dict['Q2'] = process_info.data[proc].T[1][running_index_proc - size : running_index_proc] - kin_dict['y'] = process_info.data[proc].T[2][running_index_proc - size : running_index_proc] - kin_size = kin_dict['x'].size - target = extract_target(dataset) - - # Loop over the parameter - for i in range(len(x_knots)): - PC_2, PC_L = compute_ht_parametrisation(i, x_knots, kin_dict, exp, H2_list, HL_list, reverse=reverse) - if target == 'proton': - deltas[f"p({i+1}+,0)"] += [PC_2] - deltas[f"p(0,{i+1}+)"] += [PC_L] - deltas[f"d({i+1}+,0)"] += [np.zeros(kin_size)] - deltas[f"d(0,{i+1}+)"] += [np.zeros(kin_size)] - elif target == 'deuteron': - deltas[f"p({i+1}+,0)"] += [np.zeros(kin_size)] - deltas[f"p(0,{i+1}+)"] += [np.zeros(kin_size)] - deltas[f"d({i+1}+,0)"] += [PC_2] - deltas[f"d(0,{i+1}+)"] += [PC_L] - elif target == 'ratio': - deltas[f"p({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "p", PC_2) - compute_ratio_delta(dataset, pdf)] - deltas[f"p(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "p", PC_L) - compute_ratio_delta(dataset, pdf)] - deltas[f"d({i+1}+,0)"] += [compute_ratio_delta(dataset, pdf, "d", PC_2) - compute_ratio_delta(dataset, pdf)] - deltas[f"d(0,{i+1}+)"] += [compute_ratio_delta(dataset, pdf, "d", PC_L) - compute_ratio_delta(dataset, pdf)] - else: - raise ValueError("Could not detect target.") - - # Construct theory covmat - covmats = defaultdict(list) - for proc1 in included_proc: - for proc2 in included_proc: - for i, exp1 in enumerate(included_exp[proc1]): - for j, exp2 in enumerate(included_exp[proc2]): - s = np.zeros(shape=(deltas["p(1+,0)"][i].size, deltas["p(1+,0)"][j].size)) - for par in deltas.keys(): - s += np.outer(deltas[par][i], deltas[par][j]) - start_locs = (start_proc_by_exp[exp1], start_proc_by_exp[exp2]) - covmats[start_locs] = s - return covmats - - -def extract_target(dataset): - if dataset.op == "NULL": - if "_P_" in dataset.name or "HERA" in dataset.name: - return "proton" - elif "_D_" in dataset.name: - return "deuteron" - else: - raise ValueError(f"No target detected for {dataset.name}") - elif dataset.op == "RATIO": - return "ratio" - else: - raise ValueError(f"Unexpected operator in {dataset.name}: {dataset.op}") - - -def compute_ratio_delta(dataset, pdf: PDF, target = None, PC: np.array = None) -> np.array: - """This function computes the predictions as in validphys.convolution._predictions, - but for ratio and including higher twist terms in bot NUM and """ - opfunc = operator.truediv - cuts = dataset.cuts - all_predictions = [] - for fk in dataset.fkspecs: - fk_w_cuts = fk.load_with_cuts(cuts) - tmp = central_fk_predictions(fk_w_cuts, pdf) - all_predictions.append(np.concatenate(tmp.values)) - if target == "d": - all_predictions[0] += PC - elif target == "p": - all_predictions[1] += PC - return opfunc(*all_predictions) - - -def compute_ht_parametrisation( - index: int, - nodes: list, - kin_dict: dict, - exp: str, - h2_prior: list, - hl_prior: list, - reverse: bool = False -): - if not reverse: - shifted_H2_list = [0 for k in range(len(nodes))] - shifted_HL_list = [0 for k in range(len(nodes))] - shifted_H2_list[index] = h2_prior[index] - shifted_HL_list[index] = hl_prior[index] - else: - shifted_H2_list = h2_prior.copy() - shifted_HL_list = hl_prior.copy() - shifted_H2_list[index] = 0 - shifted_HL_list[index] = 0 - - H_2 = scint.CubicSpline(nodes, shifted_H2_list) - H_L = scint.CubicSpline(nodes, shifted_HL_list) - H_2 = np.vectorize(H_2) - H_L = np.vectorize(H_L) - - x = kin_dict['x'] - y = kin_dict['y'] - Q2 = kin_dict['Q2'] - N2, NL = compute_normalisation_by_experiment(exp, x, y, Q2) - - PC_2 = N2 * H_2(x) / Q2 - PC_L = NL * H_L(x) / Q2 - return PC_2, PC_L - - -def compute_normalisation_by_experiment(experiment_name, x, y, Q2): - N_2 = np.zeros(shape=y.shape) - N_L = np.zeros(shape=y.shape) - - if "SIGMA" in experiment_name: - - if "HERA_NC" in experiment_name or "HERA_CC" in experiment_name or "NMC" in experiment_name: - yp = 1 + np.power(1 - y, 2) - yL = np.power(y, 2) - - if "HERA_NC" in experiment_name or "NMC" in experiment_name: - N_2 = 1 - N_L = - yL / yp - - elif "HERA_CC" in experiment_name: - N_2 = 1 / 4 * yp - N_L = - N_2 * yL / yp - - if "CHORUS_CC" in experiment_name: - yL = np.power(y, 2) - Gf = 1.1663787e-05 - Mh = 0.938 - MW2 = 80.398 ** 2 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / Q2 - N_2 = Gf**2 * Mh * yp / ( 2 * np.pi * np.power( 1 + Q2 / MW2, 2) ) - N_L = - N_2 * yL / yp - - elif "F2" in experiment_name: - N_2 = np.ones(shape=x.shape) - N_L = np.zeros(shape=x.shape) - - else: - raise ValueError(f"The normalisation for the observable is not known.") - - return N_2, N_L - - -def covmat_3fpt(name1, name2, deltas1, deltas2): +def covmat_3fpt(deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections of scale variation shifts""" @@ -428,6 +224,40 @@ def covmat_n3lo_ad(name1, name2, deltas1, deltas2): return 1 / norm * s +def covmat_power_corrections(deltas1, deltas2): + """Returns the the theory covariance sub-matrix for power + corrections. The two arguments `deltas1` and `deltas2` contain + the shifts for the firs and second experiment, respectively. + + The shifts are given in this form: + ``` + deltas1 = {shift1_label: array1_of_shifts1, + shift2_label: array1_of_shifts2, + shift3_label: array1_of_shifts3, + ...} + deltas2 = {shift1_label: array2_of_shifts1, + shift2_label: array2_of_shifts2, + shift3_label: array2_of_shifts3, + ...} + ``` + The sub-matrix is computed using the 5-point prescription, thus + + s = array1_of_shifts1 X array2_of_shifts1 + array1_of_shifts2 X array2_of_shifts2 + ... + + where `X` is the outer product. + """ + # Check that `deltas1` and `deltas2` have the same shifts + if deltas1.keys() != deltas2.keys(): + raise RuntimeError('The two dictionaries do not contain the same shifts.') + + size1 = next(iter(deltas1.values())).size + size2 = next(iter(deltas2.values())).size + s = np.zeros(shape=(size1, size2)) + for shift in deltas1.keys(): + s += np.outer(deltas1[shift], deltas2[shift]) + return s + + def compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2=None, deltas2=None): """Utility to compute the covariance matrix by prescription given the shifts with respect to the central value for a pair of processes. @@ -498,28 +328,30 @@ def compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2=None, del # alphas is correlated for all datapoints and the covmat construction is # therefore equivalent to that of the factorization scale variations s = covmat_3fpt(deltas1, deltas2) + elif point_prescription == 'power corrections': + # Shifts computed from power corrected predictions + s = covmat_power_corrections(deltas1, deltas2) return s @check_correct_theory_combination -def covs_pt_prescrip(combine_by_type, point_prescription): +def covs_pt_prescrip_mhou(combine_by_type, point_prescription): """Produces the sub-matrices of the theory covariance matrix according to a point prescription which matches the number of input theories. - If 5 theories are provided, a scheme 'bar' or 'nobar' must be chosen in the runcard in order to specify the prescription. Sub-matrices correspond to applying the scale variation prescription to each pair of processes in turn, using a different procedure for the case where the processes are the same relative to when they are different.""" - process_info = combine_by_type running_index = 0 + + covmats = defaultdict(list) start_proc = defaultdict(list) for name in process_info.preds: size = len(process_info.preds[name][0]) start_proc[name] = running_index running_index += size - covmats = defaultdict(list) for name1 in process_info.preds: for name2 in process_info.preds: central1, *others1 = process_info.preds[name1] @@ -529,15 +361,60 @@ def covs_pt_prescrip(combine_by_type, point_prescription): s = compute_covs_pt_prescrip(point_prescription, name1, deltas1, name2, deltas2) start_locs = (start_proc[name1], start_proc[name2]) covmats[start_locs] = s + + return covmats + + +# TODO `pc_func_type`will be removed in the future +@check_pc_parameters +def covs_pt_prescrip_pc( + combine_by_type, + point_prescription, + pdf: PDF, + pc_parameters, + pc_included_procs, + pc_excluded_exps, + pc_func_type, +): + """Produces the sub-matrices of the theory covariance matrix for power + corrections. Sub-matrices correspond to applying power corrected shifts + to each pair of `datasets`.""" + process_info = combine_by_type + datagroup_spec = process_info.data_spec + running_index = 0 + + covmats = defaultdict(list) + start_proc_by_exp = defaultdict(list) + for exp_name, data_spec in datagroup_spec.items(): + start_proc_by_exp[exp_name] = running_index + running_index += data_spec.load_commondata().ndata + + for exp_name1, data_spec1 in datagroup_spec.items(): + for exp_name2, data_spec2 in datagroup_spec.items(): + process_type1 = process_lookup(exp_name1) + process_type2 = process_lookup(exp_name2) + + is_excluded_exp = any(name in pc_excluded_exps for name in [exp_name1, exp_name2]) + is_included_proc = any( + proc not in pc_included_procs for proc in [process_type1, process_type2] + ) + if not (is_excluded_exp or is_included_proc): + deltas1 = compute_deltas_pc(data_spec1, pdf, pc_parameters, pc_func_type) + deltas2 = compute_deltas_pc(data_spec2, pdf, pc_parameters, pc_func_type) + s = compute_covs_pt_prescrip( + point_prescription, exp_name1, deltas1, exp_name2, deltas2 + ) + start_locs = (start_proc_by_exp[exp_name1], start_proc_by_exp[exp_name2]) + covmats[start_locs] = s return covmats @table -def theory_covmat_custom(covmat_custom, procs_index, combine_by_type_custom): +def theory_covmat_custom_per_prescription(covs_pt_prescrip, procs_index, combine_by_type): """Takes the individual sub-covmats between each two processes and assembles them into a full covmat. Then reshuffles the order from ordering by process to ordering by experiment as listed in the runcard""" - process_info = combine_by_type_custom + process_info = combine_by_type # Construct a covmat_index based on the order of experiments as they are in combine_by_type # NOTE: maybe the ordering of covmat_index is always the same as that of procs_index? @@ -553,7 +430,7 @@ def theory_covmat_custom(covmat_custom, procs_index, combine_by_type_custom): # Put the covariance matrices between two process into a single covariance matrix total_datapoints = sum(process_info.sizes.values()) mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32) - for locs, cov in covmat_custom.items(): + for locs, cov in covs_pt_prescrip.items(): xsize, ysize = cov.shape mat[locs[0] : locs[0] + xsize, locs[1] : locs[1] + ysize] = cov df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index) @@ -561,7 +438,7 @@ def theory_covmat_custom(covmat_custom, procs_index, combine_by_type_custom): @table -def fromfile_covmat(covmatpath, procs_data, procs_index): +def fromfile_covmat(covmatpath, groups_data_by_process, procs_index): """Reads a general theory covariance matrix from file. Then 1: Applies cuts to match experiment covariance matrix 2: Expands dimensions to match experiment covariance matrix @@ -575,7 +452,7 @@ def fromfile_covmat(covmatpath, procs_data, procs_index): # Reordering covmat to match exp order in runcard # Datasets in exp covmat dslist = [] - for group in procs_data: + for group in groups_data_by_process: for ds in group.datasets: dslist.append(ds.name) # Datasets in filecovmat in exp covmat order @@ -590,7 +467,7 @@ def fromfile_covmat(covmatpath, procs_data, procs_index): # ------------- # # Loading cuts to apply to covariance matrix indextuples = [] - for group in procs_data: + for group in groups_data_by_process: for ds in group.datasets: # Load cuts for each dataset in the covmat if ds.name in filecovmat.index.get_level_values(1): @@ -656,7 +533,7 @@ def fromfile_covmat(covmatpath, procs_data, procs_index): @table -def user_covmat(procs_data, procs_index, loaded_user_covmat_path): +def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path): """ General theory covariance matrix provided by the user. Useful for testing the impact of externally produced @@ -666,7 +543,7 @@ def user_covmat(procs_data, procs_index, loaded_user_covmat_path): ``user_covmat_path`` in ``theorycovmatconfig`` in the runcard. For more information see documentation. """ - return fromfile_covmat(loaded_user_covmat_path, procs_data, procs_index) + return fromfile_covmat(loaded_user_covmat_path, groups_data_by_process, procs_index) @table @@ -755,4 +632,3 @@ def experimentplustheory_corrmat_custom(procs_covmat, theory_covmat_custom): each_dataset_results = collect(results, ("group_dataset_inputs_by_process", "data")) -groups_data_by_process = collect("data", ("group_dataset_inputs_by_process",)) \ No newline at end of file diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py new file mode 100644 index 0000000000..972576f10d --- /dev/null +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -0,0 +1,1120 @@ +""" +This module contains the utilities for the computation of the shifts in +the theoretical predictions due to the effects of power corrections. Contrary +to scale variations, in the case of power corrections the shifts are not +computed using theories. The shifts are computed at "runtime" during vp-setupfit. + +The aim is that, after shifts being computed, the covmat can be constructed using +the same functions implemented for scale variations (e.g. `covs_pt_prescrip`). +The way shifts are computed depends also on the point prescription. In the case of +scale variations, the point prescription specifies the theories whereby shifts are +computed. In the case of power corrections, shifts and covmat constructions are +computed using a 5-point prescription extended to every parameter used to define +the power correction. + +This module comprehends a bunch of ``factory`` functions such as `DIS_F2_pc`. Each +of these functions returns another function that computes the shifts taking as arguments +the values of the parameters used to parametrise the power corrections. In +other words, these factory functions hard-code the dependence on the kinematic +and leave the dependence on the parameters free. In this way, the shifts +can be computed using different combinations of parameters (i.e. different prescriptions) +if needed. +""" + +from collections import defaultdict +import operator + +import numpy as np +import numpy.typing as npt + +from validphys.convolution import central_fk_predictions +from validphys.core import PDF, DataSetSpec + +GEV_CM2_CONV = 3.893793e10 +GF = 1.1663787e-05 # Fermi's constant [GeV^-2] +Mh = 0.938 # Proton's mass in GeV/c^2 +MW = 80.398 # W boson mass in GeV/c^2 + +F2P_exps = ['SLAC_NC_NOTFIXED_P_EM-F2', 'BCDMS_NC_NOTFIXED_P_EM-F2'] +F2D_exps = ['SLAC_NC_NOTFIXED_D_EM-F2', 'BCDMS_NC_NOTFIXED_D_EM-F2'] +NC_SIGMARED_P_EM = ['NMC_NC_NOTFIXED_P_EM-SIGMARED', 'HERA_NC_318GEV_EM-SIGMARED'] +NC_SIGMARED_P_EP = [ + 'HERA_NC_225GEV_EP-SIGMARED', + 'HERA_NC_251GEV_EP-SIGMARED', + 'HERA_NC_300GEV_EP-SIGMARED', + 'HERA_NC_318GEV_EP-SIGMARED', +] +NC_SIGMARED_P_EAVG = ['HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED'] + + +# TODO This function will be deleted in the future +def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike) -> np.ndarray: + """ + This function defines the step function used to construct the prior. The bins of the step + function are constructed using pairs of consecutive points. For instance, given the set of + points [0.0, 0.1, 0.3, 0.5], there will be three bins with edges [[0.0, 0.1], [0.1, 0.3], + 0.3, 0.5]]. Each bin is coupled with a shift, which correspond to the y-value of the bin. + + Parameters + ---------- + a: ArrayLike of float + A one-dimensional array of points at which the function is evaluated. + y_shift: ArrayLike of float + A one-dimensional array whose elements represent the y-value of each bin + bin_edges: ArrayLike of float + A one-dimensional array containing the edges of the bins. The bins are + constructed using pairs of consecutive points. + + Return + ------ + A one-dimensional array containing the function values evaluated at the points + specified in `a`. + """ + res = np.zeros_like(a) + for shift_pos, shift in enumerate(y_shift): + bin_low = bin_edges[shift_pos] + bin_high = bin_edges[shift_pos + 1] + condition = np.multiply( + a >= bin_low, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high + ) + res = np.add(res, [shift if cond else 0.0 for cond in condition]) + return res + + +# TODO This function will be deleted in the future +def cubic_spline_function( + a: npt.ArrayLike, y_shift: npt.ArrayLike, nodes: npt.ArrayLike +) -> np.ndarray: + """ + This function defines the cubic spline function used to construct the prior. The spline + is constructed using the nodes specified in `nodes` and the y-values in `y_shift`. The + spline is evaluated at the points specified in `a`. + + Parameters + ---------- + a: ArrayLike of float + A one-dimensional array of points at which the function is evaluated. + y_shift: ArrayLike of float + A one-dimensional array whose elements represent the y-value of each bin + nodes: ArrayLike of float + A one-dimensional array containing the nodes used to construct the spline. + + Return + ------ + A one-dimensional array containing the function values evaluated at the points + specified in `a`. + """ + from scipy.interpolate import CubicSpline + + cs = CubicSpline(nodes, y_shift) + return cs(a) + + +def linear_bin_function( + a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike +) -> np.ndarray: + """ + This function defines the linear bin function used to construct the prior. Specifically, + the prior is constructed using a triangular function whose value at the peak of the node + is linked to the right and left nodes using a straight line. + + Parameters + ---------- + a: ArrayLike of float + A one-dimensional array of points at which the function is evaluated. + y_shift: ArrayLike of float + A one-dimensional array whose elements represent the y-value of each bin + bin_nodes: ArrayLike of float + A one-dimensional array containing the edges of the bins. The bins are + constructed using pairs of consecutive points. + + Return + ------ + A one-dimensional array containing the function values evaluated at the points + specified in `a`. + """ + res = np.zeros_like(a) + for shift_pos, shift in enumerate(y_shift): + if shift_pos > 0 and shift_pos < len(y_shift) - 1: + bin_low = bin_edges[shift_pos - 1] + bin_high = bin_edges[shift_pos + 1] + bin_mid = bin_edges[shift_pos] + m1 = shift / (bin_mid - bin_low) + m2 = shift / (bin_high - bin_mid) + elif shift_pos == 0: # Left-most bin + bin_high = bin_edges[shift_pos + 1] + bin_mid = bin_edges[shift_pos] + bin_low = bin_mid + m1 = 0.0 + m2 = shift / (bin_high - bin_mid) + else: # Right-most bin + bin_low = bin_edges[shift_pos - 1] + bin_mid = bin_edges[shift_pos] + bin_high = bin_mid + m1 = shift / (bin_mid - bin_low) + m2 = 0.0 + cond_low = np.multiply( + a >= bin_low, a < bin_mid if shift_pos != len(y_shift) - 1 else a <= bin_mid + ) + cond_high = np.multiply( + a >= bin_mid, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high + ) + res = np.add(res, [m1 * (val - bin_low) if cond else 0.0 for val, cond in zip(a, cond_low)]) + res = np.add( + res, [-m2 * (val - bin_high) if cond else 0.0 for val, cond in zip(a, cond_high)] + ) + return res + + +def dis_pc_func( + delta_h: npt.ArrayLike, + nodes: npt.ArrayLike, + x: npt.ArrayLike, + Q2: npt.ArrayLike, + pc_func_type: str = "step", +) -> npt.ArrayLike: + """ + This function builds the functional form of the power corrections for DIS-like processes. + Power corrections are modelled using a step-function. The edges of the bins used in the + step-function are specified by the list of nodes. The y-values for each bin are given + by the array `delta_h`. The power corrections will be computed for the pairs (xb, Q2), + where `xb` is the Bjorken x. The power correction for DIS processes are rescaled by Q2. + + Parameters + ---------- + delta_h: ArrayLike + One-dimensional array containing the shifts for each bin. + nodes: ArrayLike + One-dimensional array containing the edges of the bins in x-Bjorken. + x: ArrayLike + List of x-Bjorken points at which the power correction function is evaluated. + Q2: ArrayLike + List of scales where the power correction function is evaluated. + + Returns + ------- + A one-dimensional array of power corrections for DIS-like processes where each point is + evaluated at the kinematic pair (x,Q2). + """ + if pc_func_type == "step": + PC = step_function(x, delta_h, nodes) / Q2 + elif pc_func_type == "linear": + PC = linear_bin_function(x, delta_h, nodes) / Q2 + elif pc_func_type == "cubic": + PC = cubic_spline_function(x, delta_h, nodes) / Q2 + else: + raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") + + return PC + + +def jets_pc_func( + delta_h: npt.ArrayLike, + nodes: npt.ArrayLike, + pT: npt.ArrayLike, + rap: npt.ArrayLike, + pc_func_type: str = "step", +) -> npt.ArrayLike: + """ + Same as `dis_pc_func`, but for jet data. Here, the kinematic pair consists of the rapidity + `rap` and the transverse momentum `pT`. + + Parameters + ---------- + delta_h: ArrayLike + One-dimensional array containing the shifts for each bin. + nodes: ArrayLike + One-dimensional array containing the edges of the bins in rapidity. + rap: ArrayLike + List of rapidity points at which the power correction is evaluated. + pT: ArrayLike + List of pT points at which the power correction is evaluated. + + Returns + ------- + A one-dimensional array of power corrections for jet processes where each point is + evaluated at the kinematic pair (y, pT). + """ + if pc_func_type == "step": + PC = step_function(rap, delta_h, nodes) / pT + elif pc_func_type == "linear": + PC = linear_bin_function(rap, delta_h, nodes) / pT + elif pc_func_type == "cubic": + PC = cubic_spline_function(rap, delta_h, nodes) / pT + else: + raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") + return PC + + +def jet_single_par(delta_h: float, pT: npt.ArrayLike, rap: npt.ArrayLike) -> npt.ArrayLike: + ret = [delta_h for _ in range(rap.size)] + return np.array(ret) / pT + + +def JET_pc(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): + """ + Returns the function that computes the shift for the ratio for single + jet cross sections. In particular, the shift is computed such that + + xsec -> xsec * ( 1 + PC ), + + and the shift is defined as + + Delta(xsec) = (xsec + xsec) - xsec = PC. + + The power correction is a function of the rapidity. + """ + cuts = dataset_sp.cuts + (fkspec,) = dataset_sp.fkspecs + fk = fkspec.load_with_cuts(cuts) + xsec = central_fk_predictions(fk, pdf) + + def func(y_values): + result = jets_pc_func(y_values, pc_nodes, pT, rap, pc_func_type) + return np.multiply(result, xsec.to_numpy()[:, 0]) + + return func + + +def JET_pc_single_par(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): + """ + As JET_pc, but with one single shift for all rapidity bins. + + This function is meant to be for development purposes only. It will either substitute + JET_pc or be deleted in the future.""" + cuts = dataset_sp.cuts + (fkspec,) = dataset_sp.fkspecs + fk = fkspec.load_with_cuts(cuts) + xsec = central_fk_predictions(fk, pdf) + + def func(y_values): + assert y_values.size == 1 + ret = [y_values[0] for _ in range(rap.size)] + ret = np.array(ret) / pT + return np.multiply(ret, xsec.to_numpy()[:, 0]) + + return func + + +# TODO Maybe we want to treat the function that parametrizes the PC +# as argument? +def DIS_F2_pc(pc2_nodes, x, q2, pc_func_type: str = "step"): + """ + Returns the function that computes the shift for the ratio of structure + functions F2_d / F2_p. For this observable, power corrections are defined + such that + + F2 -> F2 + PC2, + + and the shift is defined as + + Delta(F2) = (F2 + PC2) - F2 = PC2. + + Note that, as in the case of `DIS_F2R_ht`, the shift still depends on the set + of parameters needed to define the parameterization of PC2. Also, this function + can be used to compute the shift for both proton and deuteron, provided that the + correct list of parameters is passed to the **curried** function. + + The function used to parametrize the the power correction is `dis_pc_func` and + it is hard coded. + + Parameters + ---------- + + """ + + def PC_2(y_values): + result = dis_pc_func(y_values, pc2_nodes, x, q2, pc_func_type) + return result + + return PC_2 + + +def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2, pc_func_type: str = "step"): + """ + Returns the function that computes the shift for the ratio of structure + functions F2_d / F2_p. For this observable, power corrections are defined + such that + + F2_d / F2_p -> (F2_d + PC2_d) / (F2_p + PC2_p) , + + and the shift is the defined as + + Delta(F2 ratio) = (F2_d + PC2_d) / (F2_p + PC2_p) - F2_d / F2_p . + + The shift is computed for a given set of kinematic variables specified + by the paris (x,Q2), but it still depends on the set of parameters need by + the power correction terms PC2_d and PC2_p. + + Note that this function does **not** return the power corrections for the + given kinematic points, but rather it returns another function where the + kinematic dependence has been **curried** (i.e. hard coded). This new function + takes as arguments the y-values of the nodes used to compute PC2_d and PC2_p + (see `delta_h` in `dis_pc_func`). Note that these y-values are not necessarily + the values listed in the runcard, as we can apply different point prescription. + For instance, we may want to pass in a set of y-values where the nodes are shifted + one at the time, leaving the others zero. The prescription is thus handled separately. + The returning function allows thus to compute Delta(F2 ratio)({...}_d, {...}_p), where + `{...}_d` and `{...}_p` are the sets of y-values for the parametrisation for the proton + and deuteron terms respectively. + + The function used to parametrize the the power correction is `dis_pc_func` and + it is hard coded. + + Parameters + ---------- + experiment: DataSetSpec + An instance of DataSetSpec used to extract information such as cuts + and fk tables. + pdf: PDF + An instance of the class PDF. This specifies the PDF to bo convoluted + with the FK tables. + pc_2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the proton (see `dis_pc_func`). + pc_2_d_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the deuteron (see `dis_pc_func`). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2_d and P2_p. + """ + cuts = experiment.cuts + fkspec_F2D, fkspec_F2P = experiment.fkspecs + fk_F2D = fkspec_F2D.load_with_cuts(cuts) + fk_F2P = fkspec_F2P.load_with_cuts(cuts) + F2D = central_fk_predictions(fk_F2D, pdf) + F2P = central_fk_predictions(fk_F2P, pdf) + + F2D = np.concatenate(F2D.values) + F2P = np.concatenate(F2P.values) + F2_ratio = operator.truediv(F2D, F2P) + + def func(y_values_d, y_values_p): + PC_d = dis_pc_func(y_values_d, pc_2_d_nodes, x, q2, pc_func_type) + PC_p = dis_pc_func(y_values_p, pc_2_p_nodes, x, q2, pc_func_type) + num = np.sum([F2D, PC_d], axis=0) + denom = np.sum([F2P, PC_p], axis=0) + result = np.array(operator.truediv(num, denom) - F2_ratio) + return result + + return func + + +def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type: str = "step"): + """ + Builds the function used to compute the shifts for the charm structure + function measured by EMC. The process involved is + + mu^+ + Fe -> mu+^ + c cbar + X . + + This function works exactly as the previous functions used to compute + nuisance shifts. In this case, the constructed function (`func` below) + requires two lists of parameters for the proton and the deuteron + contribution. The reason being that in this process the muon scatters off an + iron target, and the power correction contribution is a mixture of proton + and deuteron nucleons. Hence, proton and deuteron contribution are weighted + by the appropriate atomic factor. + + Note that we are parametrising power corrections as proton and deuteron + targets. If we were to parametrize such contributions using, say, proton and + nucleon, than the weights would change. + + + Nuclear target + -------------- + The power corrections for nuclear observables, like in this case, are + affected by the pc contribution of the protons and that of the neutrons. If + we allow for the non-isoscalarity of the target, and combine the two + contributions in accordance with the atomic and mass number (A and Z + respectively), the power correction for the nuclear target can be written as + (see eq.(4.2.5) in + https://nnpdf.mi.infn.it/wp-content/uploads/2021/09/thesis_master_RP.pdf) + + PC_N = 1/A (Z * PC_p + (A-Z) * PC_n) . + + The deuteron is obtained using the isoscalarity, namely + + PC_d = 1/2 (PC_p + PC_n) . + + Since we parametrise the power corrections of the proton and the deuteron, + we can combine the above equations and write + + PC_N = 1/A * ( PC_p * (2Z - A) + 2 * PC_d * (A - Z) ) + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the proton (see `dis_pc_func`). + pc2_d_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for the deuteron (see `dis_pc_func`). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2_d and P2_p. + """ + # Iron target + Z = 23.403 + A = 49.618 + + def func(y_values_d, y_values_p): + PC2_d = dis_pc_func(y_values_d, pc2_d_nodes, x, q2, pc_func_type) + PC2_p = dis_pc_func(y_values_p, pc2_p_nodes, x, q2, pc_func_type) + result = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d + return result + + return func + + +def DIS_NC_XSEC_pc(pc2_nodes, pcL_nodes, pc3_nodes, lepton, x, q2, y, pc_func_type: str = "step"): + """ + Builds the function used to compute the shifts for the DIS NC x-secs + delivered by HERA and NMC. The x-sec is reconstructed as calculated + in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). + In particular, the x-sec is a linear combination of the structure functions + F_2, F_L, and F_3. The coefficients are also computed appropriately (see + link). The contribution of the power corrections is then + + Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = PC_2 + N_L * PC_L + N_3 * PC_3 + + where PC_i are the power corrections relative to the respective structure + functions and the N_i the respective coefficients (as defined in Yadism). + + This function works exactly as the previous functions used to + compute nuisance shifts. In addition, it requires the kinematic + invariant `y` to build the shift-function. + + Note that this function can be used for both proton and deuteron targets, + provided that the appropriate lists of nodes is given. + + Parameters + ---------- + pc2_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2. + pcL_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L. + pc3_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3. + """ + yp = 1 + np.power(1 - y, 2) + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + def func(y_values_pc2, y_values_pcL, y_values_pc3): + PC_2 = dis_pc_func(y_values_pc2, pc2_nodes, x, q2, pc_func_type) + PC_L = dis_pc_func(y_values_pcL, pcL_nodes, x, q2, pc_func_type) + PC_3 = dis_pc_func(y_values_pc3, pc3_nodes, x, q2, pc_func_type) + result = PC_2 + N_L * PC_L + N_3 * PC_3 + return result + + return func + + +def DIS_CC_HERA_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, lepton, x, q2, y, pc_func_type: str = "step" +): + """ + Builds the function used to compute the shifts for the DIS CC x-secs + delivered by HERA. The x-sec is reconstructed as calculated + in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). + In particular, the x-sec is a linear combination of the structure functions + F_2, F_L, and F_3. The coefficients are also computed appropriately (see + link). The contribution of the power corrections is then + + Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) + + where PC_i are the power corrections relative to the respective structure + functions and the N_i the respective coefficients (as defined in Yadism). + N is the overall normalization factor. + + For the HERA_CC_318GEV dataset, the target is always a proton. However, the + lepton may be either the electron (0) or the positron (1). This information + is needed in order to compute the coefficient N_3. + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3. + """ + yp = 1 + np.power(1 - y, 2) + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N = 1 / 4 * yp # Overall normalization + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + def func(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): + # Initialize power corrections for each structure function + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) + + # Build the contribution to the x-sec of the power corrections + result = N * (PC2_p + N_L * PCL_p + N_3 * PC3_p) + return result + + return func + + +def DIS_CC_NUTEV_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + lepton, + x, + q2, + y, + pc_func_type: str = "step", +): + """ + Builds the function used to compute the shifts for the DIS CC x-secs + delivered by NuTeV. The x-sec is reconstructed as calculated + in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). + In particular, the x-sec is a linear combination of the structure functions + F_2, F_L, and F_3. The coefficients are also computed appropriately (see + link). Note that this experiment uses iron targets, and thus the coefficients + must take into account the nuclear mixture of porton and deuteron. The contribution + of the power corrections is then + + Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) + + where PC_i are the power corrections relative to the respective structure + functions (nuclear mixture implicit) and the N_i the respective coefficients (as defined in Yadism). + N is the overall normalization factor. + + For the NuTeV CC dataset, the target is always iron. However, the + lepton may be either the electron (0) or the positron (1). This + information is needed in order to compute the coefficient N_3. + + Nuclear target + -------------- + See `DIS_F2C_pc`. + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the proton. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the proton. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the proton. + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the deuteron. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the deuteron. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the deuteron. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3 for proton and deuteron. + """ + # Iron target + Z = 23.403 + A = 49.618 + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + MW2 = np.power(MW, 2) + # Overall coefficient + # TODO: cross-check + N = 100 / 2 / np.power(1 + q2 / MW2, 2) * yp + + def func( + y_values_pc2_p, + y_values_pcL_p, + y_values_pc3_p, + y_values_pc2_d, + y_values_pcL_d, + y_values_pc3_d, + ): + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) + PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) + PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) + PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) + tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d + tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d + tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d + result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) + return result + + return func + + +# TODO This is function is really similar to the one +# defined for NUTEV CC. Can we reduce code repetitions? +def DIS_CC_CHORUS_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + lepton, + x, + q2, + y, + pc_func_type: str = "step", +): + """ + Same as DIS_CC_NUTEV_pc, but for CHORUS CC. + + Note that the difference here is in the definition of the overall + normalization N. + + Nuclear target + -------------- + See `DIS_F2C_pc`. + + Parameters + ---------- + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the proton. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the proton. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the proton. + pc2_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_2 of the deuteron. + pcL_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_L of the deuteron. + pc3_p_nodes: list[float] + The list of nodes in x-Bjorken used to define the parametrization of the + power correction for F_3 of the deuteron. + lepton: int + Whether the scattering particle is a lepton (0) or an anti-lepton(1). + x: list[float] + Set of points in x-Bjorken where the power corrections will be evaluated. + q2: list[float] + Set of points in Q2 where the power corrections will be evaluated. + y: list[float] + Set of points in y where the power corrections will be evaluated. + + Returns + ------- + The function the computes the shift for this observable. It depends on the + y-values for the parameterization of P2 and PL and P3 for proton and deuteron. + """ + # Lead target + A = 208.0 + Z = 82 + yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 + ym = 1 - np.power(1 - y, 2) + yL = np.power(y, 2) + N_L = -yL / yp # Coefficient for F_L + N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 + + MW2 = np.power(MW, 2) + # Overall coefficient + # TODO: cross-check + N = GEV_CM2_CONV * (GF**2) * Mh / (2 * np.pi * np.power(1 + q2 / MW2, 2)) * yp + + def func( + y_values_pc2_p, + y_values_pcL_p, + y_values_pc3_p, + y_values_pc2_d, + y_values_pcL_d, + y_values_pc3_d, + ): + PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) + PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) + PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) + PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) + PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) + PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) + tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d + tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d + tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d + result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) + return result + + return func + + +def construct_pars_combs(parameters_dict): + """Construct the combination of parameters (the ones that parametrize the power + corrections) used to compute the shifts. + + Example + ------- + Given the following dictionary that specifies that power corrections + ``` + pc_dict = { + 'H1': {'list': [1,2], 'nodes': [0,1]} } + 'H2': {'list': [3,4,5], 'nodes': [0,1,2]} } + } + ``` + then this functions constructs a list as follows + ``` + pars_combs = [ + {'label': 'H1(1)', 'comb': {'H1': [1,0], 'H2': [0,0,0]}, + {'label': 'H1(2)', 'comb': {'H1': [0,1], 'H2': [0,0,0]}, + {'label': 'H2(1)', 'comb': {'H1': [0,0], 'H2': [3,0,0]}, + {'label': 'H2(2)', 'comb': {'H1': [0,0], 'H2': [0,4,0]}, + {'label': 'H2(3)', 'comb': {'H1': [0,0], 'H2': [0,0,5]}, + ] + ``` + + Parameters + ---------- + """ + combinations = [] + for key, values in parameters_dict.items(): + for i in range(len(values['yshift'])): + # Create a new dictionary with all keys and zeroed-out values + new_dict = {k: np.zeros_like(v['yshift']) for k, v in parameters_dict.items()} + # Set the specific value for the current index + label = key + f'({i})' + new_dict[key][i] = values['yshift'][i] + new_dict = {'label': label, 'comb': new_dict} + combinations.append(new_dict) + + return combinations + + +# TODO `pc_func_type` will be removed +def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_type: str): + """ + Computes the shifts due to power corrections for a single dataset given + the set of parameters that model the power corrections. The result is + a dictionary containing as many arrays of shifts as the number of + combinations of the parameters. For instance, the final dictionary + may look like: + ``` + deltas1 = {comb1_label: array_of_shifts1, + comb2_label: array_of_shifts2, + comb3_label: array_of_shifts3, + ...} + ``` + Note that, as of now, we don't need to specify different prescriptions. + For that reason, the prescription adopted to construct the shifts is hard + coded in the function `construct_pars_combs`, and the prescription used to + compute the sub-matrix is hard-coded in `covmat_power_corrections`. + """ + + exp_name = dataset_sp.name + process_type = dataset_sp.commondata.metadata.process_type.name + cuts = dataset_sp.cuts.load() + + pars_combs = construct_pars_combs(pc_dict) + deltas = defaultdict(list) + + pc_func = None + if process_type.startswith('DIS'): + pc2_p_nodes = pc_dict["H2p"]['nodes'] + pcL_p_nodes = pc_dict["HLp"]['nodes'] + pc3_p_nodes = pc_dict["H3p"]['nodes'] + pc2_d_nodes = pc_dict["H2d"]['nodes'] + pcL_d_nodes = pc_dict["HLd"]['nodes'] + pc3_d_nodes = pc_dict["H3d"]['nodes'] + x = dataset_sp.commondata.metadata.load_kinematics()['x'].to_numpy().reshape(-1)[cuts] + q2 = dataset_sp.commondata.metadata.load_kinematics()['Q2'].to_numpy().reshape(-1)[cuts] + y = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] + + # F2 ratio + if exp_name == "NMC_NC_NOTFIXED_EM-F2": + pc_func = DIS_F2R_pc(dataset_sp, pdf, pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + + # F2 proton traget + elif exp_name in F2P_exps: + pc_func = DIS_F2_pc(pc2_p_nodes, x, q2, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p']) + + # F2 deuteron traget + elif exp_name in F2D_exps: + pc_func = DIS_F2_pc(pc2_d_nodes, x, q2, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2d']) + + # EMC + elif exp_name.startswith('EMC_NC_250GEV'): + pc_func = DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + + # HERA and NMC SIGMARED NC + elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): + # Electron + if exp_name in NC_SIGMARED_P_EM: + pc_func = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type + ) + # Positron + elif exp_name in NC_SIGMARED_P_EP: + pc_func = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type + ) + # Average positron and electron + # TODO + # Check if this is correct (ach) + elif NC_SIGMARED_P_EAVG: + + def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): + electron = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type + ) + positron = DIS_NC_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type + ) + result = electron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p) + positron( + y_values_pc2_p, y_values_pcL_p, y_values_pc3_p + ) + return result / 2 + + pc_func = average + else: + raise ValueError(f"{exp_name} not implemented.") + + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] + ) + + # CHORUS + elif exp_name.startswith('CHORUS_CC'): + # Nu + if exp_name == 'CHORUS_CC_NOTFIXED_PB_NU-SIGMARED': + pc_func = DIS_CC_CHORUS_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 0, + x, + q2, + y, + pc_func_type, + ) + # Nu bar + elif exp_name == 'CHORUS_CC_NOTFIXED_PB_NB-SIGMARED': + pc_func = DIS_CC_CHORUS_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 1, + x, + q2, + y, + pc_func_type, + ) + else: + raise ValueError(f"{exp_name} not implemented.") + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], + pars_pc['comb']['HLp'], + pars_pc['comb']['H3p'], + pars_pc['comb']['H2d'], + pars_pc['comb']['HLd'], + pars_pc['comb']['H3d'], + ) + + # NuTeV + elif exp_name.startswith('NUTEV_CC'): + # Nu + if exp_name == 'NUTEV_CC_NOTFIXED_FE_NU-SIGMARED': + pc_func = DIS_CC_NUTEV_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 0, + x, + q2, + y, + pc_func_type, + ) + # Nu bar + elif exp_name == 'NUTEV_CC_NOTFIXED_FE_NB-SIGMARED': + pc_func = DIS_CC_NUTEV_pc( + pc2_p_nodes, + pcL_p_nodes, + pc3_p_nodes, + pc2_d_nodes, + pcL_d_nodes, + pc3_d_nodes, + 1, + x, + q2, + y, + pc_func_type, + ) + else: + raise ValueError(f"{exp_name} not implemented.") + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], + pars_pc['comb']['HLp'], + pars_pc['comb']['H3p'], + pars_pc['comb']['H2d'], + pars_pc['comb']['HLd'], + pars_pc['comb']['H3d'], + ) + + # HERA_CC + elif exp_name.startswith('HERA_CC'): + # electron + if exp_name == 'HERA_CC_318GEV_EM-SIGMARED': + pc_func = DIS_CC_HERA_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type + ) + # positron + elif exp_name == 'HERA_CC_318GEV_EP-SIGMARED': + pc_func = DIS_CC_HERA_XSEC_pc( + pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type + ) + else: + raise ValueError(f"{exp_name} not implemented.") + + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] + ) + + else: + raise ValueError( + f"The {process_type} observable for {exp_name} " "has not been implemented." + ) + + elif process_type == 'JET': + pc_jet_nodes = pc_dict["Hj"]['nodes'] + eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] + pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] + + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) + + elif process_type == 'DIJET': + + if dataset_sp.commondata.metadata.experiment == 'ATLAS': + pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] + eta_star = ( + dataset_sp.commondata.metadata.load_kinematics()['ystar'] + .to_numpy() + .reshape(-1)[cuts] + ) + m_jj = ( + dataset_sp.commondata.metadata.load_kinematics()['m_jj'] + .to_numpy() + .reshape(-1)[cuts] + ) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) + + elif dataset_sp.commondata.metadata.experiment == 'CMS': + pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] + eta_diff = ( + dataset_sp.commondata.metadata.load_kinematics()['ydiff'] + .to_numpy() + .reshape(-1)[cuts] + ) + m_jj = ( + dataset_sp.commondata.metadata.load_kinematics()['m_jj'] + .to_numpy() + .reshape(-1)[cuts] + ) + pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) + for pars_pc in pars_combs: + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) + + else: + raise ValueError( + f"{dataset_sp.commondata.metadata.experiment} is not implemented for DIJET." + ) + + else: + raise RuntimeError(f"{process_type} has not been implemented.") + + return deltas From e9ba53ab37aa76b379baba51691a520dec450635 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 10 Jun 2025 09:22:21 +0100 Subject: [PATCH 16/23] Implementation of multiplicative shifts --- .../higher_twist_functions.py | 743 +++--------------- 1 file changed, 93 insertions(+), 650 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 972576f10d..fee68c6c02 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -12,7 +12,7 @@ computed using a 5-point prescription extended to every parameter used to define the power correction. -This module comprehends a bunch of ``factory`` functions such as `DIS_F2_pc`. Each +This module comprehends a bunch of ``factory`` functions such as `mult_dis_pc`. Each of these functions returns another function that computes the shifts taking as arguments the values of the parameters used to parametrise the power corrections. In other words, these factory functions hard-code the dependence on the kinematic @@ -30,11 +30,6 @@ from validphys.convolution import central_fk_predictions from validphys.core import PDF, DataSetSpec -GEV_CM2_CONV = 3.893793e10 -GF = 1.1663787e-05 # Fermi's constant [GeV^-2] -Mh = 0.938 # Proton's mass in GeV/c^2 -MW = 80.398 # W boson mass in GeV/c^2 - F2P_exps = ['SLAC_NC_NOTFIXED_P_EM-F2', 'BCDMS_NC_NOTFIXED_P_EM-F2'] F2D_exps = ['SLAC_NC_NOTFIXED_D_EM-F2', 'BCDMS_NC_NOTFIXED_D_EM-F2'] NC_SIGMARED_P_EM = ['NMC_NC_NOTFIXED_P_EM-SIGMARED', 'HERA_NC_318GEV_EM-SIGMARED'] @@ -246,147 +241,99 @@ def jets_pc_func( return PC -def jet_single_par(delta_h: float, pT: npt.ArrayLike, rap: npt.ArrayLike) -> npt.ArrayLike: - ret = [delta_h for _ in range(rap.size)] - return np.array(ret) / pT +# def jet_single_par(delta_h: float, pT: npt.ArrayLike, rap: npt.ArrayLike) -> npt.ArrayLike: +# ret = [delta_h for _ in range(rap.size)] +# return np.array(ret) / pT +# def mult_jet_pc_single_par(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): +# """ +# As mult_jet_pc, but with one single shift for all rapidity bins. -def JET_pc(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): - """ - Returns the function that computes the shift for the ratio for single - jet cross sections. In particular, the shift is computed such that +# This function is meant to be for development purposes only. It will either substitute +# mult_jet_pc or be deleted in the future.""" +# cuts = dataset_sp.cuts +# (fkspec,) = dataset_sp.fkspecs +# fk = fkspec.load_with_cuts(cuts) +# xsec = central_fk_predictions(fk, pdf) - xsec -> xsec * ( 1 + PC ), +# def func(y_values): +# assert y_values.size == 1 +# ret = [y_values[0] for _ in range(rap.size)] +# ret = np.array(ret) / pT +# return np.multiply(ret, xsec.to_numpy()[:, 0]) - and the shift is defined as +# return func - Delta(xsec) = (xsec + xsec) - xsec = PC. - The power correction is a function of the rapidity. +def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): """ - cuts = dataset_sp.cuts - (fkspec,) = dataset_sp.fkspecs - fk = fkspec.load_with_cuts(cuts) - xsec = central_fk_predictions(fk, pdf) + Returns the function that computes the shift to observables due to + power corrections. Power corrections are treated as multiplicative + shifts. Hence if `O` is the observable, the prediction is shifted as - def func(y_values): - result = jets_pc_func(y_values, pc_nodes, pT, rap, pc_func_type) - return np.multiply(result, xsec.to_numpy()[:, 0]) + O -> O * (1 + PC), - return func + and the shift is defined as + Delta(O) = O * (1 + PC) - O = O * PC. -def JET_pc_single_par(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): + This function returns a function that computes the shift + given the y-values of the nodes used to define the power corrections. + The interpolation between the nodes is specified by `pc_func_type`. """ - As JET_pc, but with one single shift for all rapidity bins. - - This function is meant to be for development purposes only. It will either substitute - JET_pc or be deleted in the future.""" cuts = dataset_sp.cuts (fkspec,) = dataset_sp.fkspecs fk = fkspec.load_with_cuts(cuts) - xsec = central_fk_predictions(fk, pdf) + th_preds = central_fk_predictions(fk, pdf) def func(y_values): - assert y_values.size == 1 - ret = [y_values[0] for _ in range(rap.size)] - ret = np.array(ret) / pT - return np.multiply(ret, xsec.to_numpy()[:, 0]) + result = dis_pc_func(y_values, nodes, x, q2, pc_func_type) + return np.multiply(result, th_preds.to_numpy()[:, 0]) return func -# TODO Maybe we want to treat the function that parametrizes the PC -# as argument? -def DIS_F2_pc(pc2_nodes, x, q2, pc_func_type: str = "step"): +def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): """ Returns the function that computes the shift for the ratio of structure functions F2_d / F2_p. For this observable, power corrections are defined such that - F2 -> F2 + PC2, - - and the shift is defined as - - Delta(F2) = (F2 + PC2) - F2 = PC2. - - Note that, as in the case of `DIS_F2R_ht`, the shift still depends on the set - of parameters needed to define the parameterization of PC2. Also, this function - can be used to compute the shift for both proton and deuteron, provided that the - correct list of parameters is passed to the **curried** function. - - The function used to parametrize the the power correction is `dis_pc_func` and - it is hard coded. - - Parameters - ---------- - - """ - - def PC_2(y_values): - result = dis_pc_func(y_values, pc2_nodes, x, q2, pc_func_type) - return result - - return PC_2 - - -def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2, pc_func_type: str = "step"): - """ - Returns the function that computes the shift for the ratio of structure - functions F2_d / F2_p. For this observable, power corrections are defined - such that - - F2_d / F2_p -> (F2_d + PC2_d) / (F2_p + PC2_p) , + F2_d / F2_p -> F2_d * (1 + PC2_d) / F2_p * (1 + PC2_p) , and the shift is the defined as - Delta(F2 ratio) = (F2_d + PC2_d) / (F2_p + PC2_p) - F2_d / F2_p . - - The shift is computed for a given set of kinematic variables specified - by the paris (x,Q2), but it still depends on the set of parameters need by - the power correction terms PC2_d and PC2_p. + Delta(F2 ratio) = F2_d / F2_p * (PC2_d - PC2_p) / (1 + PC2_d). - Note that this function does **not** return the power corrections for the - given kinematic points, but rather it returns another function where the - kinematic dependence has been **curried** (i.e. hard coded). This new function - takes as arguments the y-values of the nodes used to compute PC2_d and PC2_p - (see `delta_h` in `dis_pc_func`). Note that these y-values are not necessarily - the values listed in the runcard, as we can apply different point prescription. - For instance, we may want to pass in a set of y-values where the nodes are shifted - one at the time, leaving the others zero. The prescription is thus handled separately. - The returning function allows thus to compute Delta(F2 ratio)({...}_d, {...}_p), where - `{...}_d` and `{...}_p` are the sets of y-values for the parametrisation for the proton - and deuteron terms respectively. - - The function used to parametrize the the power correction is `dis_pc_func` and - it is hard coded. + As for `mult_dis_pc`, this function returns a function that computes the shift + for the ratio of structure functions F2_d / F2_p given a set of y-values. Parameters ---------- - experiment: DataSetSpec - An instance of DataSetSpec used to extract information such as cuts - and fk tables. - pdf: PDF - An instance of the class PDF. This specifies the PDF to bo convoluted - with the FK tables. - pc_2_p_nodes: list[float] + p_nodes: list[float] The list of nodes in x-Bjorken used to define the parametrization of the power correction for the proton (see `dis_pc_func`). - pc_2_d_nodes: list[float] + d_nodes: list[float] The list of nodes in x-Bjorken used to define the parametrization of the power correction for the deuteron (see `dis_pc_func`). x: list[float] Set of points in x-Bjorken where the power corrections will be evaluated. q2: list[float] Set of points in Q2 where the power corrections will be evaluated. + dataset_sp: DataSetSpec + An instance of DataSetSpec used to extract information such as cuts + and fk tables. + pdf: PDF + An instance of the class PDF. This specifies the PDF to bo convoluted + with the FK tables. Returns ------- The function the computes the shift for this observable. It depends on the y-values for the parameterization of P2_d and P2_p. """ - cuts = experiment.cuts - fkspec_F2D, fkspec_F2P = experiment.fkspecs + cuts = dataset_sp.cuts + fkspec_F2D, fkspec_F2P = dataset_sp.fkspecs fk_F2D = fkspec_F2D.load_with_cuts(cuts) fk_F2P = fkspec_F2P.load_with_cuts(cuts) F2D = central_fk_predictions(fk_F2D, pdf) @@ -396,421 +343,39 @@ def DIS_F2R_pc(experiment, pdf, pc_2_p_nodes, pc_2_d_nodes, x, q2, pc_func_type: F2P = np.concatenate(F2P.values) F2_ratio = operator.truediv(F2D, F2P) - def func(y_values_d, y_values_p): - PC_d = dis_pc_func(y_values_d, pc_2_d_nodes, x, q2, pc_func_type) - PC_p = dis_pc_func(y_values_p, pc_2_p_nodes, x, q2, pc_func_type) - num = np.sum([F2D, PC_d], axis=0) - denom = np.sum([F2P, PC_p], axis=0) - result = np.array(operator.truediv(num, denom) - F2_ratio) - return result - - return func - - -def DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type: str = "step"): - """ - Builds the function used to compute the shifts for the charm structure - function measured by EMC. The process involved is - - mu^+ + Fe -> mu+^ + c cbar + X . - - This function works exactly as the previous functions used to compute - nuisance shifts. In this case, the constructed function (`func` below) - requires two lists of parameters for the proton and the deuteron - contribution. The reason being that in this process the muon scatters off an - iron target, and the power correction contribution is a mixture of proton - and deuteron nucleons. Hence, proton and deuteron contribution are weighted - by the appropriate atomic factor. - - Note that we are parametrising power corrections as proton and deuteron - targets. If we were to parametrize such contributions using, say, proton and - nucleon, than the weights would change. - - - Nuclear target - -------------- - The power corrections for nuclear observables, like in this case, are - affected by the pc contribution of the protons and that of the neutrons. If - we allow for the non-isoscalarity of the target, and combine the two - contributions in accordance with the atomic and mass number (A and Z - respectively), the power correction for the nuclear target can be written as - (see eq.(4.2.5) in - https://nnpdf.mi.infn.it/wp-content/uploads/2021/09/thesis_master_RP.pdf) - - PC_N = 1/A (Z * PC_p + (A-Z) * PC_n) . - - The deuteron is obtained using the isoscalarity, namely - - PC_d = 1/2 (PC_p + PC_n) . - - Since we parametrise the power corrections of the proton and the deuteron, - we can combine the above equations and write - - PC_N = 1/A * ( PC_p * (2Z - A) + 2 * PC_d * (A - Z) ) - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for the proton (see `dis_pc_func`). - pc2_d_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for the deuteron (see `dis_pc_func`). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2_d and P2_p. - """ - # Iron target - Z = 23.403 - A = 49.618 - - def func(y_values_d, y_values_p): - PC2_d = dis_pc_func(y_values_d, pc2_d_nodes, x, q2, pc_func_type) - PC2_p = dis_pc_func(y_values_p, pc2_p_nodes, x, q2, pc_func_type) - result = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d - return result - - return func - - -def DIS_NC_XSEC_pc(pc2_nodes, pcL_nodes, pc3_nodes, lepton, x, q2, y, pc_func_type: str = "step"): - """ - Builds the function used to compute the shifts for the DIS NC x-secs - delivered by HERA and NMC. The x-sec is reconstructed as calculated - in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). - In particular, the x-sec is a linear combination of the structure functions - F_2, F_L, and F_3. The coefficients are also computed appropriately (see - link). The contribution of the power corrections is then - - Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = PC_2 + N_L * PC_L + N_3 * PC_3 - - where PC_i are the power corrections relative to the respective structure - functions and the N_i the respective coefficients (as defined in Yadism). - - This function works exactly as the previous functions used to - compute nuisance shifts. In addition, it requires the kinematic - invariant `y` to build the shift-function. - - Note that this function can be used for both proton and deuteron targets, - provided that the appropriate lists of nodes is given. - - Parameters - ---------- - pc2_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2. - pcL_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L. - pc3_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3. - """ - yp = 1 + np.power(1 - y, 2) - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - def func(y_values_pc2, y_values_pcL, y_values_pc3): - PC_2 = dis_pc_func(y_values_pc2, pc2_nodes, x, q2, pc_func_type) - PC_L = dis_pc_func(y_values_pcL, pcL_nodes, x, q2, pc_func_type) - PC_3 = dis_pc_func(y_values_pc3, pc3_nodes, x, q2, pc_func_type) - result = PC_2 + N_L * PC_L + N_3 * PC_3 + def func(y_values_p, y_values_d): + h2d = dis_pc_func(y_values_d, d_nodes, x, q2, pc_func_type) + h2p = dis_pc_func(y_values_p, p_nodes, x, q2, pc_func_type) + num = np.sum([h2d, -h2p], axis=0) + denom = np.sum([np.ones_like(h2p), h2p], axis=0) + result = np.multiply(operator.truediv(num, denom), F2_ratio) return result return func - -def DIS_CC_HERA_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, lepton, x, q2, y, pc_func_type: str = "step" -): +def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf, pc_func_type: str = "step"): """ - Builds the function used to compute the shifts for the DIS CC x-secs - delivered by HERA. The x-sec is reconstructed as calculated - in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). - In particular, the x-sec is a linear combination of the structure functions - F_2, F_L, and F_3. The coefficients are also computed appropriately (see - link). The contribution of the power corrections is then - - Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) - - where PC_i are the power corrections relative to the respective structure - functions and the N_i the respective coefficients (as defined in Yadism). - N is the overall normalization factor. + As `mult_dis_pc`, but for jet data. The power corrections are defined as - For the HERA_CC_318GEV dataset, the target is always a proton. However, the - lepton may be either the electron (0) or the positron (1). This information - is needed in order to compute the coefficient N_3. - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3. - """ - yp = 1 + np.power(1 - y, 2) - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N = 1 / 4 * yp # Overall normalization - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - def func(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): - # Initialize power corrections for each structure function - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) - - # Build the contribution to the x-sec of the power corrections - result = N * (PC2_p + N_L * PCL_p + N_3 * PC3_p) - return result - - return func - - -def DIS_CC_NUTEV_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - lepton, - x, - q2, - y, - pc_func_type: str = "step", -): - """ - Builds the function used to compute the shifts for the DIS CC x-secs - delivered by NuTeV. The x-sec is reconstructed as calculated - in Yadism (see https://yadism.readthedocs.io/en/latest/theory/intro.html). - In particular, the x-sec is a linear combination of the structure functions - F_2, F_L, and F_3. The coefficients are also computed appropriately (see - link). Note that this experiment uses iron targets, and thus the coefficients - must take into account the nuclear mixture of porton and deuteron. The contribution - of the power corrections is then - - Delta(x-sec) = x-sec_w_pc - x-sec_wo_pc = N * (PC_2 + N_L * PC_L + N_3 * PC_3) - - where PC_i are the power corrections relative to the respective structure - functions (nuclear mixture implicit) and the N_i the respective coefficients (as defined in Yadism). - N is the overall normalization factor. - - For the NuTeV CC dataset, the target is always iron. However, the - lepton may be either the electron (0) or the positron (1). This - information is needed in order to compute the coefficient N_3. - - Nuclear target - -------------- - See `DIS_F2C_pc`. - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the proton. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the proton. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the proton. - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the deuteron. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the deuteron. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the deuteron. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. - - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3 for proton and deuteron. - """ - # Iron target - Z = 23.403 - A = 49.618 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - MW2 = np.power(MW, 2) - # Overall coefficient - # TODO: cross-check - N = 100 / 2 / np.power(1 + q2 / MW2, 2) * yp - - def func( - y_values_pc2_p, - y_values_pcL_p, - y_values_pc3_p, - y_values_pc2_d, - y_values_pcL_d, - y_values_pc3_d, - ): - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) - PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) - PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) - PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) - tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d - tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d - tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d - result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) - return result + xsec -> xsec * ( 1 + PC ), - return func + and the shift is defined as + Delta(xsec) = (xsec + xsec) - xsec = PC. -# TODO This is function is really similar to the one -# defined for NUTEV CC. Can we reduce code repetitions? -def DIS_CC_CHORUS_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - lepton, - x, - q2, - y, - pc_func_type: str = "step", -): + The power correction is a function of the rapidity. """ - Same as DIS_CC_NUTEV_pc, but for CHORUS CC. - - Note that the difference here is in the definition of the overall - normalization N. - - Nuclear target - -------------- - See `DIS_F2C_pc`. - - Parameters - ---------- - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the proton. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the proton. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the proton. - pc2_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_2 of the deuteron. - pcL_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_L of the deuteron. - pc3_p_nodes: list[float] - The list of nodes in x-Bjorken used to define the parametrization of the - power correction for F_3 of the deuteron. - lepton: int - Whether the scattering particle is a lepton (0) or an anti-lepton(1). - x: list[float] - Set of points in x-Bjorken where the power corrections will be evaluated. - q2: list[float] - Set of points in Q2 where the power corrections will be evaluated. - y: list[float] - Set of points in y where the power corrections will be evaluated. + cuts = dataset_sp.cuts + (fkspec,) = dataset_sp.fkspecs + fk = fkspec.load_with_cuts(cuts) + xsec = central_fk_predictions(fk, pdf) - Returns - ------- - The function the computes the shift for this observable. It depends on the - y-values for the parameterization of P2 and PL and P3 for proton and deuteron. - """ - # Lead target - A = 208.0 - Z = 82 - yp = 1 + np.power(1 - y, 2) - 2 * np.power(x * y * Mh, 2) / q2 - ym = 1 - np.power(1 - y, 2) - yL = np.power(y, 2) - N_L = -yL / yp # Coefficient for F_L - N_3 = np.power(-1, lepton) * ym / yp # Coefficient for F_3 - - MW2 = np.power(MW, 2) - # Overall coefficient - # TODO: cross-check - N = GEV_CM2_CONV * (GF**2) * Mh / (2 * np.pi * np.power(1 + q2 / MW2, 2)) * yp - - def func( - y_values_pc2_p, - y_values_pcL_p, - y_values_pc3_p, - y_values_pc2_d, - y_values_pcL_d, - y_values_pc3_d, - ): - PC2_p = dis_pc_func(y_values_pc2_p, pc2_p_nodes, x, q2, pc_func_type) - PCL_p = dis_pc_func(y_values_pcL_p, pcL_p_nodes, x, q2, pc_func_type) - PC3_p = dis_pc_func(y_values_pc3_p, pc3_p_nodes, x, q2, pc_func_type) - PC2_d = dis_pc_func(y_values_pc2_d, pc2_d_nodes, x, q2, pc_func_type) - PCL_d = dis_pc_func(y_values_pcL_d, pcL_d_nodes, x, q2, pc_func_type) - PC3_d = dis_pc_func(y_values_pc3_d, pc3_d_nodes, x, q2, pc_func_type) - tmp_2 = (2 * Z - A) / A * PC2_p + 2 * (A - Z) / A * PC2_d - tmp_L = (2 * Z - A) / A * PCL_p + 2 * (A - Z) / A * PCL_d - tmp_3 = (2 * Z - A) / A * PC3_p + 2 * (A - Z) / A * PC3_d - result = N * (tmp_2 + N_L * tmp_L + N_3 * tmp_3) - return result + def func(y_values): + result = jets_pc_func(y_values, nodes, pT, rap, pc_func_type) + return np.multiply(result, xsec.to_numpy()[:, 0]) return func - def construct_pars_combs(parameters_dict): """Construct the combination of parameters (the ones that parametrize the power corrections) used to compute the shifts. @@ -851,7 +416,6 @@ def construct_pars_combs(parameters_dict): return combinations - # TODO `pc_func_type` will be removed def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_type: str): """ @@ -881,185 +445,64 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ pc_func = None if process_type.startswith('DIS'): - pc2_p_nodes = pc_dict["H2p"]['nodes'] - pcL_p_nodes = pc_dict["HLp"]['nodes'] - pc3_p_nodes = pc_dict["H3p"]['nodes'] - pc2_d_nodes = pc_dict["H2d"]['nodes'] - pcL_d_nodes = pc_dict["HLd"]['nodes'] - pc3_d_nodes = pc_dict["H3d"]['nodes'] + f2_p_nodes = pc_dict["H2p"]['nodes'] + f2_d_nodes = pc_dict["H2p"]['nodes'] + hera_nc_xsec_nodes = pc_dict["xsec_nc"]['nodes'] + hera_cc_xsec_nodes = pc_dict["hera_cc"]['nodes'] + chorus_cc_xsec_nodes = pc_dict["chorus_cc"]['nodes'] + nutev_cc_xsec_nodes = pc_dict["nutev_cc"]['nodes'] + x = dataset_sp.commondata.metadata.load_kinematics()['x'].to_numpy().reshape(-1)[cuts] q2 = dataset_sp.commondata.metadata.load_kinematics()['Q2'].to_numpy().reshape(-1)[cuts] - y = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] # F2 ratio if exp_name == "NMC_NC_NOTFIXED_EM-F2": - pc_func = DIS_F2R_pc(dataset_sp, pdf, pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) + pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) # F2 proton traget elif exp_name in F2P_exps: - pc_func = DIS_F2_pc(pc2_p_nodes, x, q2, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p']) # F2 deuteron traget elif exp_name in F2D_exps: - pc_func = DIS_F2_pc(pc2_d_nodes, x, q2, pc_func_type) + pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2d']) # EMC elif exp_name.startswith('EMC_NC_250GEV'): - pc_func = DIS_F2C_pc(pc2_p_nodes, pc2_d_nodes, x, q2, pc_func_type) - for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + raise NotImplementedError( + f"The {process_type} observable for {exp_name} " + "has not been implemented." + ) - # HERA and NMC SIGMARED NC + # HERA NC xsec elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): - # Electron - if exp_name in NC_SIGMARED_P_EM: - pc_func = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type - ) - # Positron - elif exp_name in NC_SIGMARED_P_EP: - pc_func = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type - ) - # Average positron and electron - # TODO - # Check if this is correct (ach) - elif NC_SIGMARED_P_EAVG: - - def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): - electron = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type - ) - positron = DIS_NC_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type - ) - result = electron(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p) + positron( - y_values_pc2_p, y_values_pcL_p, y_values_pc3_p - ) - return result / 2 - - pc_func = average - else: - raise ValueError(f"{exp_name} not implemented.") - + pc_func = mult_dis_pc(hera_nc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['xsec_nc']) # CHORUS elif exp_name.startswith('CHORUS_CC'): - # Nu - if exp_name == 'CHORUS_CC_NOTFIXED_PB_NU-SIGMARED': - pc_func = DIS_CC_CHORUS_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 0, - x, - q2, - y, - pc_func_type, - ) - # Nu bar - elif exp_name == 'CHORUS_CC_NOTFIXED_PB_NB-SIGMARED': - pc_func = DIS_CC_CHORUS_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 1, - x, - q2, - y, - pc_func_type, - ) - else: - raise ValueError(f"{exp_name} not implemented.") + pc_func = mult_dis_pc(chorus_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], - pars_pc['comb']['HLp'], - pars_pc['comb']['H3p'], - pars_pc['comb']['H2d'], - pars_pc['comb']['HLd'], - pars_pc['comb']['H3d'], - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['chorus_cc']) # NuTeV elif exp_name.startswith('NUTEV_CC'): - # Nu - if exp_name == 'NUTEV_CC_NOTFIXED_FE_NU-SIGMARED': - pc_func = DIS_CC_NUTEV_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 0, - x, - q2, - y, - pc_func_type, - ) - # Nu bar - elif exp_name == 'NUTEV_CC_NOTFIXED_FE_NB-SIGMARED': - pc_func = DIS_CC_NUTEV_pc( - pc2_p_nodes, - pcL_p_nodes, - pc3_p_nodes, - pc2_d_nodes, - pcL_d_nodes, - pc3_d_nodes, - 1, - x, - q2, - y, - pc_func_type, - ) - else: - raise ValueError(f"{exp_name} not implemented.") + pc_func = mult_dis_pc(nutev_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], - pars_pc['comb']['HLp'], - pars_pc['comb']['H3p'], - pars_pc['comb']['H2d'], - pars_pc['comb']['HLd'], - pars_pc['comb']['H3d'], - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['nutev_cc']) # HERA_CC elif exp_name.startswith('HERA_CC'): - # electron - if exp_name == 'HERA_CC_318GEV_EM-SIGMARED': - pc_func = DIS_CC_HERA_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 0, x, q2, y, pc_func_type - ) - # positron - elif exp_name == 'HERA_CC_318GEV_EP-SIGMARED': - pc_func = DIS_CC_HERA_XSEC_pc( - pc2_p_nodes, pcL_p_nodes, pc3_p_nodes, 1, x, q2, y, pc_func_type - ) - else: - raise ValueError(f"{exp_name} not implemented.") - + pc_func = mult_dis_pc(hera_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func( - pars_pc['comb']['H2p'], pars_pc['comb']['HLp'], pars_pc['comb']['H3p'] - ) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['hera_cc']) else: raise ValueError( @@ -1071,7 +514,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, pT, eta, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, pT, eta, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) @@ -1089,7 +532,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_star, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) @@ -1105,7 +548,7 @@ def average(y_values_pc2_p, y_values_pcL_p, y_values_pc3_p): .to_numpy() .reshape(-1)[cuts] ) - pc_func = JET_pc(dataset_sp, pdf, pc_jet_nodes, m_jj, eta_diff, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) From 79e6c0177b47df10651896686a3333d785157b44 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 17 Jun 2025 09:16:48 +0100 Subject: [PATCH 17/23] Less functions for PCs --- .../higher_twist_functions.py | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index fee68c6c02..b94590f3de 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -445,12 +445,9 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ pc_func = None if process_type.startswith('DIS'): - f2_p_nodes = pc_dict["H2p"]['nodes'] - f2_d_nodes = pc_dict["H2p"]['nodes'] - hera_nc_xsec_nodes = pc_dict["xsec_nc"]['nodes'] - hera_cc_xsec_nodes = pc_dict["hera_cc"]['nodes'] - chorus_cc_xsec_nodes = pc_dict["chorus_cc"]['nodes'] - nutev_cc_xsec_nodes = pc_dict["nutev_cc"]['nodes'] + f2_p_nodes = pc_dict["f2p"]['nodes'] + f2_d_nodes = pc_dict["f2d"]['nodes'] + dis_cc_nodes = pc_dict["dis_cc"]['nodes'] x = dataset_sp.commondata.metadata.load_kinematics()['x'].to_numpy().reshape(-1)[cuts] q2 = dataset_sp.commondata.metadata.load_kinematics()['Q2'].to_numpy().reshape(-1)[cuts] @@ -459,19 +456,19 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ if exp_name == "NMC_NC_NOTFIXED_EM-F2": pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['H2p'], pars_pc['comb']['H2d']) + deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['f2p'], pars_pc['comb']['f2d']) # F2 proton traget elif exp_name in F2P_exps: pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2p']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # F2 deuteron traget elif exp_name in F2D_exps: pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2d']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2d']) # EMC elif exp_name.startswith('EMC_NC_250GEV'): @@ -482,27 +479,27 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ # HERA NC xsec elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): - pc_func = mult_dis_pc(hera_nc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['xsec_nc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # CHORUS elif exp_name.startswith('CHORUS_CC'): - pc_func = mult_dis_pc(chorus_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['chorus_cc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # NuTeV elif exp_name.startswith('NUTEV_CC'): - pc_func = mult_dis_pc(nutev_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes , x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['nutev_cc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # HERA_CC elif exp_name.startswith('HERA_CC'): - pc_func = mult_dis_pc(hera_cc_xsec_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['hera_cc']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) else: raise ValueError( From 41f618fa01b08e2631e6c15e05f71c4b442bf812 Mon Sep 17 00:00:00 2001 From: achiefa Date: Tue, 1 Jul 2025 15:40:18 +0100 Subject: [PATCH 18/23] Combined di-jet --- .../validphys/theorycovariance/higher_twist_functions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index b94590f3de..8e164a5ad7 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -516,9 +516,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) elif process_type == 'DIJET': + if dataset_sp.commondata.metadata.experiment == 'ATLAS': - pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] + pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] if pc_dict.get("H2j_ATLAS") else pc_dict["Hj"]['nodes'] eta_star = ( dataset_sp.commondata.metadata.load_kinematics()['ystar'] .to_numpy() @@ -531,10 +532,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['Hj']) elif dataset_sp.commondata.metadata.experiment == 'CMS': - pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] + pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["Hj"]['nodes'] eta_diff = ( dataset_sp.commondata.metadata.load_kinematics()['ydiff'] .to_numpy() @@ -547,7 +548,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['Hj']) else: raise ValueError( From ae47eb3e18175b81d014e82899d7717aba753f4e Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 3 Jul 2025 09:23:19 +0100 Subject: [PATCH 19/23] Correct combined di-jet --- .../validphys/theorycovariance/higher_twist_functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 8e164a5ad7..67fb45b9a2 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -519,7 +519,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ if dataset_sp.commondata.metadata.experiment == 'ATLAS': - pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] if pc_dict.get("H2j_ATLAS") else pc_dict["Hj"]['nodes'] + pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] if pc_dict.get("H2j_ATLAS") else pc_dict["H2j"]['nodes'] eta_star = ( dataset_sp.commondata.metadata.load_kinematics()['ystar'] .to_numpy() @@ -532,10 +532,10 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['Hj']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['H2j']) elif dataset_sp.commondata.metadata.experiment == 'CMS': - pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["Hj"]['nodes'] + pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["H2j"]['nodes'] eta_diff = ( dataset_sp.commondata.metadata.load_kinematics()['ydiff'] .to_numpy() @@ -548,7 +548,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['Hj']) + deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['H2j']) else: raise ValueError( From 06e807a680c8b85bf3d37233464dcb39648c8198 Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 13 Aug 2025 13:26:45 +0100 Subject: [PATCH 20/23] Remove func_type dependence - default is linear interpolation --- validphys2/src/validphys/checks.py | 14 +- validphys2/src/validphys/config.py | 7 - .../theorycovariance/construction.py | 6 +- .../higher_twist_functions.py | 128 ++++-------------- 4 files changed, 28 insertions(+), 127 deletions(-) diff --git a/validphys2/src/validphys/checks.py b/validphys2/src/validphys/checks.py index 81844c4160..d0d608c7b4 100644 --- a/validphys2/src/validphys/checks.py +++ b/validphys2/src/validphys/checks.py @@ -364,19 +364,11 @@ def check_darwin_single_process(NPROC): @make_argcheck -def check_pc_parameters(pc_parameters, pc_func_type): +def check_pc_parameters(pc_parameters): """Check that the parameters for the PC method are set correctly""" for par in pc_parameters.values(): - # Check that the length of shifts is one less than the length of nodes. - if (len(par['yshift']) != len(par['nodes']) - 1) and pc_func_type not in [ - 'cubic', - 'linear', - ]: - raise ValueError( - f"The length of nodes does not match that of the list in {par['ht']}." - f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" - ) - elif (len(par['yshift']) != len(par['nodes'])) and pc_func_type in ['cubic', 'linear']: + # Check that the length of shifts is the same as the length of nodes + if (len(par['yshift']) != len(par['nodes'])): raise ValueError( f"The length of nodes does not match that of the list in {par['ht']}." f"Check the runcard. Got {len(par['yshift'])} != {len(par['nodes'])}" diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 8171d625e8..23f2b308b4 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1969,13 +1969,6 @@ def produce_total_phi_data(self, fitthcovmat): return validphys.results.total_phi_data_from_experiments return validphys.results.dataset_inputs_phi_data - # TODO: to be removed once we are sure the the triangular - # function for the prior is the only one of interest - def produce_pc_func_type(self, theorycovmatconfig=None): - if theorycovmatconfig is None: - raise ValueError("theorycovmatconfig is defined in the runcard.") - return theorycovmatconfig.get('func_type', 'linear') - @configparser.explicit_node def produce_covs_pt_prescrip(self, point_prescription): if point_prescription != 'power corrections': diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 05b4a70747..f1b3151a9c 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -365,7 +365,6 @@ def covs_pt_prescrip_mhou(combine_by_type, point_prescription): return covmats -# TODO `pc_func_type`will be removed in the future @check_pc_parameters def covs_pt_prescrip_pc( combine_by_type, @@ -374,7 +373,6 @@ def covs_pt_prescrip_pc( pc_parameters, pc_included_procs, pc_excluded_exps, - pc_func_type, ): """Produces the sub-matrices of the theory covariance matrix for power corrections. Sub-matrices correspond to applying power corrected shifts @@ -399,8 +397,8 @@ def covs_pt_prescrip_pc( proc not in pc_included_procs for proc in [process_type1, process_type2] ) if not (is_excluded_exp or is_included_proc): - deltas1 = compute_deltas_pc(data_spec1, pdf, pc_parameters, pc_func_type) - deltas2 = compute_deltas_pc(data_spec2, pdf, pc_parameters, pc_func_type) + deltas1 = compute_deltas_pc(data_spec1, pdf, pc_parameters) + deltas2 = compute_deltas_pc(data_spec2, pdf, pc_parameters) s = compute_covs_pt_prescrip( point_prescription, exp_name1, deltas1, exp_name2, deltas2 ) diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 67fb45b9a2..11876c3cee 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -42,69 +42,6 @@ NC_SIGMARED_P_EAVG = ['HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED'] -# TODO This function will be deleted in the future -def step_function(a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike) -> np.ndarray: - """ - This function defines the step function used to construct the prior. The bins of the step - function are constructed using pairs of consecutive points. For instance, given the set of - points [0.0, 0.1, 0.3, 0.5], there will be three bins with edges [[0.0, 0.1], [0.1, 0.3], - 0.3, 0.5]]. Each bin is coupled with a shift, which correspond to the y-value of the bin. - - Parameters - ---------- - a: ArrayLike of float - A one-dimensional array of points at which the function is evaluated. - y_shift: ArrayLike of float - A one-dimensional array whose elements represent the y-value of each bin - bin_edges: ArrayLike of float - A one-dimensional array containing the edges of the bins. The bins are - constructed using pairs of consecutive points. - - Return - ------ - A one-dimensional array containing the function values evaluated at the points - specified in `a`. - """ - res = np.zeros_like(a) - for shift_pos, shift in enumerate(y_shift): - bin_low = bin_edges[shift_pos] - bin_high = bin_edges[shift_pos + 1] - condition = np.multiply( - a >= bin_low, a < bin_high if shift_pos != len(y_shift) - 1 else a <= bin_high - ) - res = np.add(res, [shift if cond else 0.0 for cond in condition]) - return res - - -# TODO This function will be deleted in the future -def cubic_spline_function( - a: npt.ArrayLike, y_shift: npt.ArrayLike, nodes: npt.ArrayLike -) -> np.ndarray: - """ - This function defines the cubic spline function used to construct the prior. The spline - is constructed using the nodes specified in `nodes` and the y-values in `y_shift`. The - spline is evaluated at the points specified in `a`. - - Parameters - ---------- - a: ArrayLike of float - A one-dimensional array of points at which the function is evaluated. - y_shift: ArrayLike of float - A one-dimensional array whose elements represent the y-value of each bin - nodes: ArrayLike of float - A one-dimensional array containing the nodes used to construct the spline. - - Return - ------ - A one-dimensional array containing the function values evaluated at the points - specified in `a`. - """ - from scipy.interpolate import CubicSpline - - cs = CubicSpline(nodes, y_shift) - return cs(a) - - def linear_bin_function( a: npt.ArrayLike, y_shift: npt.ArrayLike, bin_edges: npt.ArrayLike ) -> np.ndarray: @@ -166,14 +103,13 @@ def dis_pc_func( nodes: npt.ArrayLike, x: npt.ArrayLike, Q2: npt.ArrayLike, - pc_func_type: str = "step", ) -> npt.ArrayLike: """ This function builds the functional form of the power corrections for DIS-like processes. - Power corrections are modelled using a step-function. The edges of the bins used in the - step-function are specified by the list of nodes. The y-values for each bin are given + Power corrections are modelled using a linear function, which interpolates between the nodes + of the parameterisation. The y-values for each node are given by the array `delta_h`. The power corrections will be computed for the pairs (xb, Q2), - where `xb` is the Bjorken x. The power correction for DIS processes are rescaled by Q2. + where `xb` is the Bjorken x. The power correction for DIS processes is divided by Q2. Parameters ---------- @@ -191,15 +127,7 @@ def dis_pc_func( A one-dimensional array of power corrections for DIS-like processes where each point is evaluated at the kinematic pair (x,Q2). """ - if pc_func_type == "step": - PC = step_function(x, delta_h, nodes) / Q2 - elif pc_func_type == "linear": - PC = linear_bin_function(x, delta_h, nodes) / Q2 - elif pc_func_type == "cubic": - PC = cubic_spline_function(x, delta_h, nodes) / Q2 - else: - raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") - + PC = linear_bin_function(x, delta_h, nodes) / Q2 return PC @@ -208,7 +136,6 @@ def jets_pc_func( nodes: npt.ArrayLike, pT: npt.ArrayLike, rap: npt.ArrayLike, - pc_func_type: str = "step", ) -> npt.ArrayLike: """ Same as `dis_pc_func`, but for jet data. Here, the kinematic pair consists of the rapidity @@ -230,14 +157,7 @@ def jets_pc_func( A one-dimensional array of power corrections for jet processes where each point is evaluated at the kinematic pair (y, pT). """ - if pc_func_type == "step": - PC = step_function(rap, delta_h, nodes) / pT - elif pc_func_type == "linear": - PC = linear_bin_function(rap, delta_h, nodes) / pT - elif pc_func_type == "cubic": - PC = cubic_spline_function(rap, delta_h, nodes) / pT - else: - raise ValueError(f"Invalid function type: {pc_func_type} is not supported.") + PC = linear_bin_function(rap, delta_h, nodes) / pT return PC @@ -265,7 +185,7 @@ def jets_pc_func( # return func -def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): +def mult_dis_pc(nodes, x, q2, dataset_sp, pdf): """ Returns the function that computes the shift to observables due to power corrections. Power corrections are treated as multiplicative @@ -279,7 +199,6 @@ def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): This function returns a function that computes the shift given the y-values of the nodes used to define the power corrections. - The interpolation between the nodes is specified by `pc_func_type`. """ cuts = dataset_sp.cuts (fkspec,) = dataset_sp.fkspecs @@ -287,13 +206,13 @@ def mult_dis_pc(nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): th_preds = central_fk_predictions(fk, pdf) def func(y_values): - result = dis_pc_func(y_values, nodes, x, q2, pc_func_type) + result = dis_pc_func(y_values, nodes, x, q2) return np.multiply(result, th_preds.to_numpy()[:, 0]) return func -def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf, pc_func_type: str = "step"): +def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf): """ Returns the function that computes the shift for the ratio of structure functions F2_d / F2_p. For this observable, power corrections are defined @@ -344,8 +263,8 @@ def mult_dis_ratio_pc(p_nodes, d_nodes, x, q2, dataset_sp, pdf, pc_func_type: st F2_ratio = operator.truediv(F2D, F2P) def func(y_values_p, y_values_d): - h2d = dis_pc_func(y_values_d, d_nodes, x, q2, pc_func_type) - h2p = dis_pc_func(y_values_p, p_nodes, x, q2, pc_func_type) + h2d = dis_pc_func(y_values_d, d_nodes, x, q2) + h2p = dis_pc_func(y_values_p, p_nodes, x, q2) num = np.sum([h2d, -h2p], axis=0) denom = np.sum([np.ones_like(h2p), h2p], axis=0) result = np.multiply(operator.truediv(num, denom), F2_ratio) @@ -353,7 +272,7 @@ def func(y_values_p, y_values_d): return func -def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf, pc_func_type: str = "step"): +def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf): """ As `mult_dis_pc`, but for jet data. The power corrections are defined as @@ -371,7 +290,7 @@ def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf, pc_func_type: str = "step"): xsec = central_fk_predictions(fk, pdf) def func(y_values): - result = jets_pc_func(y_values, nodes, pT, rap, pc_func_type) + result = jets_pc_func(y_values, nodes, pT, rap) return np.multiply(result, xsec.to_numpy()[:, 0]) return func @@ -416,8 +335,7 @@ def construct_pars_combs(parameters_dict): return combinations -# TODO `pc_func_type` will be removed -def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_type: str): +def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): """ Computes the shifts due to power corrections for a single dataset given the set of parameters that model the power corrections. The result is @@ -454,19 +372,19 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ # F2 ratio if exp_name == "NMC_NC_NOTFIXED_EM-F2": - pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['f2p'], pars_pc['comb']['f2d']) # F2 proton traget elif exp_name in F2P_exps: - pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # F2 deuteron traget elif exp_name in F2D_exps: - pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_d_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2d']) @@ -479,25 +397,25 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ # HERA NC xsec elif exp_name in np.concatenate([NC_SIGMARED_P_EM, NC_SIGMARED_P_EP, NC_SIGMARED_P_EAVG]): - pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(f2_p_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['f2p']) # CHORUS elif exp_name.startswith('CHORUS_CC'): - pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # NuTeV elif exp_name.startswith('NUTEV_CC'): - pc_func = mult_dis_pc(dis_cc_nodes , x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes , x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) # HERA_CC elif exp_name.startswith('HERA_CC'): - pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf, pc_func_type) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) @@ -511,7 +429,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ eta = dataset_sp.commondata.metadata.load_kinematics()['y'].to_numpy().reshape(-1)[cuts] pT = dataset_sp.commondata.metadata.load_kinematics()['pT'].to_numpy().reshape(-1)[cuts] - pc_func = mult_jet_pc(pc_jet_nodes, pT, eta, dataset_sp, pdf, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, pT, eta, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) @@ -530,7 +448,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ .to_numpy() .reshape(-1)[cuts] ) - pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['H2j']) @@ -546,7 +464,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict, pc_func_ .to_numpy() .reshape(-1)[cuts] ) - pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf, pc_func_type) + pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['H2j']) From 418f06b73b7c55afb210833efc5c593a488d31d9 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 25 Sep 2025 18:24:42 +0100 Subject: [PATCH 21/23] Allow multiplicative factor for user covmat --- validphys2/src/validphys/config.py | 16 ++++++++++++++++ .../validphys/theorycovariance/construction.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 23f2b308b4..007b7f24fb 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -878,6 +878,7 @@ def produce_loaded_theory_covmat( # change ordering according to exp_covmat (so according to runcard order) tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) bb = [str(i) for i in data_input] + import ipdb; ipdb.set_trace() return tmp.reindex(index=bb, columns=bb, level=0).values @configparser.explicit_node @@ -1323,6 +1324,21 @@ def produce_nnfit_theory_covmat( f = user_covmat_fitting return f + + def produce_mult_factor_user_covmat(self, mult_factor: float = None, user_covmat_path: str = None): + """ + Multiplicative factors for the user covmat provided by mult_factors_user_covmat in the runcard. + If no factors are provided, returns None. + For use in theorycovariance.construction.user_covmat. + """ + # Check that if mult_factors is provided, user_covmat_paths is also provided + if mult_factor is not None and user_covmat_path is None: + raise ConfigError("If mult_factors is provided, user_covmat_paths must also be provided.") + + if mult_factor is None: + return 1.0 if user_covmat_path is not None else None + else: + return mult_factor def produce_fitthcovmat( self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index f1b3151a9c..a7f3e22254 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -531,7 +531,7 @@ def fromfile_covmat(covmatpath, groups_data_by_process, procs_index): @table -def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path): +def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path, mult_factor_user_covmat): """ General theory covariance matrix provided by the user. Useful for testing the impact of externally produced @@ -541,7 +541,7 @@ def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path): ``user_covmat_path`` in ``theorycovmatconfig`` in the runcard. For more information see documentation. """ - return fromfile_covmat(loaded_user_covmat_path, groups_data_by_process, procs_index) + return mult_factor_user_covmat * fromfile_covmat(loaded_user_covmat_path, groups_data_by_process, procs_index) @table From c10ba3e3c1530371de160a8c783dd285c8e256cf Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 25 Sep 2025 22:34:03 +0100 Subject: [PATCH 22/23] Remove debug trace --- validphys2/src/validphys/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 007b7f24fb..526edb6765 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -878,7 +878,6 @@ def produce_loaded_theory_covmat( # change ordering according to exp_covmat (so according to runcard order) tmp = theory_covmat.droplevel(0, axis=0).droplevel(0, axis=1) bb = [str(i) for i in data_input] - import ipdb; ipdb.set_trace() return tmp.reindex(index=bb, columns=bb, level=0).values @configparser.explicit_node From aa7c97bc22e06ea2eb69c2c9bbba2ebdcb9d72b8 Mon Sep 17 00:00:00 2001 From: achiefa Date: Fri, 9 Jan 2026 11:06:14 +0000 Subject: [PATCH 23/23] Removing deprecated functions + clean up --- .../theorycovariance/construction.py | 10 ++- .../higher_twist_functions.py | 69 ++++++++----------- 2 files changed, 34 insertions(+), 45 deletions(-) diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index a7f3e22254..b85f0f7aaf 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -3,7 +3,7 @@ Tools for constructing theory covariance matrices and computing their chi2s. """ -from collections import defaultdict, namedtuple +from collections import defaultdict import dataclasses import logging @@ -531,7 +531,9 @@ def fromfile_covmat(covmatpath, groups_data_by_process, procs_index): @table -def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path, mult_factor_user_covmat): +def user_covmat( + groups_data_by_process, procs_index, loaded_user_covmat_path, mult_factor_user_covmat +): """ General theory covariance matrix provided by the user. Useful for testing the impact of externally produced @@ -541,7 +543,9 @@ def user_covmat(groups_data_by_process, procs_index, loaded_user_covmat_path, mu ``user_covmat_path`` in ``theorycovmatconfig`` in the runcard. For more information see documentation. """ - return mult_factor_user_covmat * fromfile_covmat(loaded_user_covmat_path, groups_data_by_process, procs_index) + return mult_factor_user_covmat * fromfile_covmat( + loaded_user_covmat_path, groups_data_by_process, procs_index + ) @table diff --git a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py index 11876c3cee..69dd91e0bd 100644 --- a/validphys2/src/validphys/theorycovariance/higher_twist_functions.py +++ b/validphys2/src/validphys/theorycovariance/higher_twist_functions.py @@ -99,10 +99,7 @@ def linear_bin_function( def dis_pc_func( - delta_h: npt.ArrayLike, - nodes: npt.ArrayLike, - x: npt.ArrayLike, - Q2: npt.ArrayLike, + delta_h: npt.ArrayLike, nodes: npt.ArrayLike, x: npt.ArrayLike, Q2: npt.ArrayLike ) -> npt.ArrayLike: """ This function builds the functional form of the power corrections for DIS-like processes. @@ -132,10 +129,7 @@ def dis_pc_func( def jets_pc_func( - delta_h: npt.ArrayLike, - nodes: npt.ArrayLike, - pT: npt.ArrayLike, - rap: npt.ArrayLike, + delta_h: npt.ArrayLike, nodes: npt.ArrayLike, pT: npt.ArrayLike, rap: npt.ArrayLike ) -> npt.ArrayLike: """ Same as `dis_pc_func`, but for jet data. Here, the kinematic pair consists of the rapidity @@ -161,30 +155,6 @@ def jets_pc_func( return PC -# def jet_single_par(delta_h: float, pT: npt.ArrayLike, rap: npt.ArrayLike) -> npt.ArrayLike: -# ret = [delta_h for _ in range(rap.size)] -# return np.array(ret) / pT - -# def mult_jet_pc_single_par(dataset_sp, pdf, pc_nodes, pT, rap, pc_func_type: str = "step"): -# """ -# As mult_jet_pc, but with one single shift for all rapidity bins. - -# This function is meant to be for development purposes only. It will either substitute -# mult_jet_pc or be deleted in the future.""" -# cuts = dataset_sp.cuts -# (fkspec,) = dataset_sp.fkspecs -# fk = fkspec.load_with_cuts(cuts) -# xsec = central_fk_predictions(fk, pdf) - -# def func(y_values): -# assert y_values.size == 1 -# ret = [y_values[0] for _ in range(rap.size)] -# ret = np.array(ret) / pT -# return np.multiply(ret, xsec.to_numpy()[:, 0]) - -# return func - - def mult_dis_pc(nodes, x, q2, dataset_sp, pdf): """ Returns the function that computes the shift to observables due to @@ -272,6 +242,7 @@ def func(y_values_p, y_values_d): return func + def mult_jet_pc(nodes, pT, rap, dataset_sp, pdf): """ As `mult_dis_pc`, but for jet data. The power corrections are defined as @@ -295,6 +266,7 @@ def func(y_values): return func + def construct_pars_combs(parameters_dict): """Construct the combination of parameters (the ones that parametrize the power corrections) used to compute the shifts. @@ -335,6 +307,7 @@ def construct_pars_combs(parameters_dict): return combinations + def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): """ Computes the shifts due to power corrections for a single dataset given @@ -374,7 +347,9 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): if exp_name == "NMC_NC_NOTFIXED_EM-F2": pc_func_ratio = mult_dis_ratio_pc(f2_p_nodes, f2_d_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func_ratio(pars_pc['comb']['f2p'], pars_pc['comb']['f2d']) + deltas[pars_pc['label']] = pc_func_ratio( + pars_pc['comb']['f2p'], pars_pc['comb']['f2d'] + ) # F2 proton traget elif exp_name in F2P_exps: @@ -390,9 +365,8 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): # EMC elif exp_name.startswith('EMC_NC_250GEV'): - raise NotImplementedError( - f"The {process_type} observable for {exp_name} " - "has not been implemented." + raise NotImplementedError( + f"The {process_type} observable for {exp_name} " "has not been implemented." ) # HERA NC xsec @@ -409,7 +383,7 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): # NuTeV elif exp_name.startswith('NUTEV_CC'): - pc_func = mult_dis_pc(dis_cc_nodes , x, q2, dataset_sp, pdf) + pc_func = mult_dis_pc(dis_cc_nodes, x, q2, dataset_sp, pdf) for pars_pc in pars_combs: deltas[pars_pc['label']] = pc_func(pars_pc['comb']['dis_cc']) @@ -434,10 +408,13 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): deltas[pars_pc['label']] = pc_func(pars_pc['comb']['Hj']) elif process_type == 'DIJET': - if dataset_sp.commondata.metadata.experiment == 'ATLAS': - pc_jet_nodes = pc_dict["H2j_ATLAS"]['nodes'] if pc_dict.get("H2j_ATLAS") else pc_dict["H2j"]['nodes'] + pc_jet_nodes = ( + pc_dict["H2j_ATLAS"]['nodes'] + if pc_dict.get("H2j_ATLAS") + else pc_dict["H2j"]['nodes'] + ) eta_star = ( dataset_sp.commondata.metadata.load_kinematics()['ystar'] .to_numpy() @@ -450,10 +427,16 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_star, dataset_sp, pdf) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_ATLAS'] if pc_dict.get("H2j_ATLAS") else pars_pc['comb']['H2j']) + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2j_ATLAS'] + if pc_dict.get("H2j_ATLAS") + else pars_pc['comb']['H2j'] + ) elif dataset_sp.commondata.metadata.experiment == 'CMS': - pc_jet_nodes = pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["H2j"]['nodes'] + pc_jet_nodes = ( + pc_dict["H2j_CMS"]['nodes'] if pc_dict.get("H2j_CMS") else pc_dict["H2j"]['nodes'] + ) eta_diff = ( dataset_sp.commondata.metadata.load_kinematics()['ydiff'] .to_numpy() @@ -466,7 +449,9 @@ def compute_deltas_pc(dataset_sp: DataSetSpec, pdf: PDF, pc_dict: dict): ) pc_func = mult_jet_pc(pc_jet_nodes, m_jj, eta_diff, dataset_sp, pdf) for pars_pc in pars_combs: - deltas[pars_pc['label']] = pc_func(pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['H2j']) + deltas[pars_pc['label']] = pc_func( + pars_pc['comb']['H2j_CMS'] if pc_dict.get("H2j_CMS") else pars_pc['comb']['H2j'] + ) else: raise ValueError(