Skip to content

Variable vs Variable

puma.var_vs_var.VarVsVar #

Bases: puma.plot_base.PlotLineObject

VarVsVar class storing info about curve and allows to calculate ratio w.r.t other efficiency plots.

Initialise properties of VarVsVar curve object.

Parameters:

Name Type Description Default
x_var numpy.ndarray

Values for x-axis variable, e.g. bin midpoints for binned data

required
y_var_mean numpy.ndarray

Mean value for y-axis variable

required
y_var_std numpy.ndarray

Std value for y-axis variable

required
x_var_widths numpy.ndarray

Widths for x-axis variable, e.g. bin widths for binned data

None
key str

Identifier for the curve e.g. tagger, by default None

None
fill bool

Defines do we need to fill box around point, by default True

True
plot_y_std bool

Defines do we need to plot y_var_std, by default True

True
**kwargs kwargs

Keyword arguments passed to PlotLineObject

{}

Raises:

Type Description
ValueError

If provided options are not compatible with each other

Source code in puma/var_vs_var.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def __init__(
    self,
    x_var: np.ndarray,
    y_var_mean: np.ndarray,
    y_var_std: np.ndarray,
    x_var_widths: np.ndarray = None,
    key: str | None = None,
    fill: bool = True,
    plot_y_std: bool = True,
    **kwargs,
) -> None:
    """Initialise properties of VarVsVar curve object.

    Parameters
    ----------
    x_var : np.ndarray
        Values for x-axis variable, e.g. bin midpoints for binned data
    y_var_mean : np.ndarray
        Mean value for y-axis variable
    y_var_std : np.ndarray
        Std value for y-axis variable
    x_var_widths : np.ndarray, optional
        Widths for x-axis variable, e.g. bin widths for binned data
    key : str, optional
        Identifier for the curve e.g. tagger, by default None
    fill : bool, optional
        Defines do we need to fill box around point, by default True
    plot_y_std : bool, optional
        Defines do we need to plot y_var_std, by default True
    **kwargs : kwargs
        Keyword arguments passed to `PlotLineObject`

    Raises
    ------
    ValueError
        If provided options are not compatible with each other
    """
    super().__init__(**kwargs)
    if len(x_var) != len(y_var_mean):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `y_var_mean` "
            f"({len(y_var_mean)}) have to be identical."
        )
    if len(x_var) != len(y_var_std):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `y_var_std` "
            f"({len(y_var_std)}) have to be identical."
        )
    if x_var_widths is not None and len(x_var) != len(x_var_widths):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `x_var_widths` "
            f"({len(x_var_widths)}) have to be identical."
        )
    self.x_var = np.array(x_var)
    self.x_var_widths = None if x_var_widths is None else np.array(x_var_widths)
    self.y_var_mean = np.array(y_var_mean)
    self.y_var_std = np.array(y_var_std)

    self.key = key
    self.fill = fill
    self.plot_y_std = plot_y_std

divide #

Calculate ratio between two class objects.

Parameters:

Name Type Description Default
other VarVsVar class

Second VarVsVar object to calculate ratio with

required
inverse bool

If False the ratio is calculated this / other, if True the inverse is calculated

False

Returns:

Type Description
numpy.ndarray

Ratio

numpy.ndarray

Ratio error

Raises:

Type Description
ValueError

If binning is not identical between 2 objects

Source code in puma/var_vs_var.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def divide(self, other, inverse: bool = False):
    """Calculate ratio between two class objects.

    Parameters
    ----------
    other : VarVsVar class
        Second VarVsVar object to calculate ratio with
    inverse : bool
        If False the ratio is calculated `this / other`,
        if True the inverse is calculated

    Returns
    -------
    np.ndarray
        Ratio
    np.ndarray
        Ratio error

    Raises
    ------
    ValueError
        If binning is not identical between 2 objects
    """
    if not np.array_equal(self.x_var, other.x_var):
        raise ValueError("The x variables of the two given objects do not match.")
    nom, nom_err = self.y_var_mean, self.y_var_std
    denom, denom_err = other.y_var_mean, other.y_var_std

    ratio, ratio_err = hist_ratio(
        numerator=denom if inverse else nom,
        denominator=nom if inverse else denom,
        numerator_unc=denom_err if inverse else nom_err,
        step=False,
    )
    return (ratio, ratio_err)

puma.var_vs_var.VarVsVarPlot #

Bases: puma.plot_base.PlotBase

var_vs_eff plot class.

var_vs_eff plot properties.

Parameters:

Name Type Description Default
grid bool

Set the grid for the plots.

False
**kwargs kwargs

Keyword arguments from puma.PlotObject

{}

Raises:

Type Description
ValueError

If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_var.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def __init__(self, grid: bool = False, **kwargs) -> None:
    """var_vs_eff plot properties.

    Parameters
    ----------
    grid : bool, optional
        Set the grid for the plots.
    **kwargs : kwargs
        Keyword arguments from `puma.PlotObject`

    Raises
    ------
    ValueError
        If incompatible mode given or more than 1 ratio panel requested
    """
    super().__init__(grid=grid, **kwargs)

    self.plot_objects = {}
    self.add_order = []
    self.ratios_objects = {}
    self.reference_object = None
    self.x_var_min = np.inf
    self.x_var_max = -np.inf
    self.inverse_cut = False
    if self.n_ratio_panels > 1:
        raise ValueError("Not more than one ratio panel supported.")
    self.initialise_figure()

add #

Adding VarVsVar object to figure.

Parameters:

Name Type Description Default
curve VarVsVar class

VarVsVar curve

required
key str

Unique identifier for VarVsVar curve, by default None

None
reference bool

If VarVsVar is used as reference for ratio calculation, by default False

False

Raises:

Type Description
KeyError

If unique identifier key is used twice

Source code in puma/var_vs_var.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
def add(self, curve: VarVsVar, key: str | None = None, reference: bool = False):
    """Adding VarVsVar object to figure.

    Parameters
    ----------
    curve : VarVsVar class
        VarVsVar curve
    key : str, optional
        Unique identifier for VarVsVar curve, by default None
    reference : bool, optional
        If VarVsVar is used as reference for ratio calculation, by default False

    Raises
    ------
    KeyError
        If unique identifier key is used twice
    """
    if key is None:
        key = len(self.plot_objects) + 1
    if key in self.plot_objects:
        raise KeyError(f"Duplicated key {key} already used for unique identifier.")

    self.plot_objects[key] = curve
    self.add_order.append(key)
    # set linestyle
    if curve.linestyle is None:
        curve.linestyle = "-"
    # set colours
    if curve.colour is None:
        curve.colour = get_good_colours()[len(self.plot_objects) - 1]
    # set alpha
    if curve.alpha is None:
        curve.alpha = 0.8
    # set linewidth
    if curve.linewidth is None:
        curve.linewidth = 1.6

    if curve.is_marker is True:
        if curve.marker is None:
            curve.marker = get_good_markers()[len(self.plot_objects)]
        # Set markersize
        if curve.markersize is None:
            curve.markersize = 8
        if curve.markeredgewidth is None:
            curve.markeredgewidth = 2

    # set min and max edges
    if curve.x_var_widths is not None:
        left_edge = curve.x_var - curve.x_var_widths / 2
        right_edge = curve.x_var + curve.x_var_widths / 2
    else:
        left_edge = curve.x_var
        right_edge = curve.x_var
    self.x_var_min = min(self.x_var_min, np.sort(left_edge)[0])
    self.x_var_max = max(self.x_var_max, np.sort(right_edge)[-1])

    if reference:
        logger.debug("Setting roc %s as reference.", key)
        self.set_reference(key)

draw #

Draw figure.

Parameters:

Name Type Description Default
labelpad int

Spacing in points from the axes bounding box including ticks and tick labels, by default "ratio"

None
Source code in puma/var_vs_var.py
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
def draw(
    self,
    labelpad: int | None = None,
):
    """Draw figure.

    Parameters
    ----------
    labelpad : int, optional
        Spacing in points from the axes bounding box including
        ticks and tick labels, by default "ratio"
    """
    self.set_xlim(
        self.x_var_min if self.xmin is None else self.xmin,
        self.x_var_max if self.xmax is None else self.xmax,
    )
    plt_handles = self.plot()
    if self.n_ratio_panels == 1:
        self.plot_ratios()
    self.set_title()
    self.set_log()
    self.set_y_lim()
    self.set_xlabel()
    self.set_tick_params()
    self.set_ylabel(self.axis_top)

    if self.n_ratio_panels > 0:
        self.set_ylabel(
            self.ratio_axes[0],
            self.ylabel_ratio[0],
            align_right=False,
            labelpad=labelpad,
        )
    self.make_legend(plt_handles, ax_mpl=self.axis_top)
    self.plotting_done = True
    if self.apply_atlas_style is True:
        self.atlasify()

draw_hline #

Draw hline in top plot panel.

Parameters:

Name Type Description Default
y_val float

y value of the horizontal line

required
Source code in puma/var_vs_var.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
def draw_hline(self, y_val: float):
    """Draw hline in top plot panel.

    Parameters
    ----------
    y_val : float
        y value of the horizontal line
    """
    self.axis_top.hlines(
        y=y_val,
        xmin=self.x_var_min,
        xmax=self.x_var_max,
        colors="black",
        linestyle="dotted",
        alpha=0.5,
    )

plot #

Plotting curves.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments passed to plt.axis.errorbar

{}

Returns:

Type Description
puma.line_plot_2d.Line2D

matplotlib Line2D object

Source code in puma/var_vs_var.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def plot(self, **kwargs):
    """Plotting curves.

    Parameters
    ----------
    **kwargs: kwargs
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves")
    plt_handles = []
    for key in self.add_order:
        elem = self.plot_objects[key]
        error_bar = self.axis_top.errorbar(
            elem.x_var,
            elem.y_var_mean,
            xerr=elem.x_var_widths / 2 if elem.x_var_widths is not None else None,
            yerr=(elem.y_var_std if elem.plot_y_std else np.zeros_like(elem.x_var)),
            color=elem.colour,
            fmt="none",
            label=elem.label,
            alpha=elem.alpha,
            linewidth=elem.linewidth,
            ms=elem.markersize,
            **kwargs,
        )
        # # set linestyle for errorbar
        error_bar[-1][0].set_linestyle(elem.linestyle)
        # Draw markers
        if elem.is_marker is True:
            self.axis_top.scatter(
                x=elem.x_var,
                y=elem.y_var_mean,
                marker=elem.marker,
                s=elem.markersize**2,
                color=elem.colour,
            )
        if elem.x_var_widths is not None and elem.fill:
            for x_pos, y_pos, width, height in zip(
                elem.x_var,
                elem.y_var_mean,
                elem.x_var_widths,
                2 * elem.y_var_std,
            ):
                self.axis_top.add_patch(
                    Rectangle(
                        xy=(
                            x_pos - width / 2,
                            y_pos - height / 2,
                        ),
                        width=width,
                        height=height,
                        linewidth=0,
                        color=elem.colour,
                        alpha=0.3,
                        zorder=1,
                    )
                )
        plt_handles.append(
            mpl.lines.Line2D(
                [],
                [],
                color=elem.colour,
                label=elem.label,
                linestyle=elem.linestyle,
                marker=elem.marker,
                markersize=elem.markersize,
            )
        )
    return plt_handles

plot_ratios #

Plotting ratio curves.

Raises:

Type Description
ValueError

If no reference curve is defined

Source code in puma/var_vs_var.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def plot_ratios(self):
    """Plotting ratio curves.

    Raises
    ------
    ValueError
        If no reference curve is defined
    """
    if self.reference_object is None:
        raise ValueError("Please specify a reference curve.")
    for key in self.add_order:
        elem = self.plot_objects[key]
        (ratio, ratio_err) = elem.divide(self.plot_objects[self.reference_object])
        error_bar = self.ratio_axes[0].errorbar(
            elem.x_var,
            ratio,
            xerr=elem.x_var_widths / 2 if elem.x_var_widths is not None else None,
            yerr=ratio_err if elem.plot_y_std else np.zeros_like(elem.x_var),
            color=elem.colour,
            fmt="none",
            alpha=elem.alpha,
            linewidth=elem.linewidth,
            ms=elem.markersize,
        )
        # set linestyle for errorbar
        error_bar[-1][0].set_linestyle(elem.linestyle)
        # draw markers
        if elem.is_marker is True:
            self.ratio_axes[0].scatter(
                x=elem.x_var,
                y=ratio,
                marker=elem.marker,
                color=elem.colour,
                s=elem.markersize**2,
            )
        if elem.x_var_widths is not None and elem.fill:
            for x_pos, y_pos, width, height in zip(
                elem.x_var, ratio, elem.x_var_widths, 2 * ratio_err
            ):
                self.ratio_axes[0].add_patch(
                    Rectangle(
                        xy=(
                            x_pos - width / 2,
                            y_pos - height / 2,
                        ),
                        width=width,
                        height=height,
                        linewidth=0,
                        color=elem.colour,
                        alpha=0.3,
                        zorder=1,
                    )
                )

set_reference #

Setting the reference roc curves used in the ratios.

Parameters:

Name Type Description Default
key str

Unique identifier of roc object

required
Source code in puma/var_vs_var.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def set_reference(self, key: str):
    """Setting the reference roc curves used in the ratios.

    Parameters
    ----------
    key : str
        Unique identifier of roc object
    """
    if self.reference_object is None:
        self.reference_object = key
    else:
        logger.warning(
            "You specified a second curve %s as reference for ratio. "
            "Using it as new reference instead of %s.",
            key,
            self.reference_object,
        )
        self.reference_object = key

puma.var_vs_eff.VarVsEff #

Bases: puma.var_vs_var.VarVsVar

Class for efficiency vs. variable plot.

Initialise properties of roc curve object.

Parameters:

Name Type Description Default
x_var_sig numpy.ndarray

Values for x-axis variable for signal

required
disc_sig numpy.ndarray

Discriminant values for signal

required
x_var_bkg numpy.ndarray

Values for x-axis variable for background, by default None

None
disc_bkg numpy.ndarray

Discriminant values for background, by default None

None
bins int or sequence of scalars

If bins is an int, it defines the number of equal-width bins in the given range (10, by default). If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths, by default 10

10
working_point float

Working point, by default None

None
disc_cut float or sequence of floats

Cut value for discriminant, if it is a sequence it has to have the same length as number of bins, by default None

None
flat_per_bin bool

If True and no disc_cut is given the signal efficiency is held constant in each bin, by default False

False
key str

Identifier for the curve e.g. tagger, by default None

None
**kwargs kwargs

Keyword arguments passed to PlotLineObject

{}

Raises:

Type Description
ValueError

If provided options are not compatible with each other

Source code in puma/var_vs_eff.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def __init__(
    self,
    x_var_sig: np.ndarray,
    disc_sig: np.ndarray,
    x_var_bkg: np.ndarray = None,
    disc_bkg: np.ndarray = None,
    bins=10,
    working_point: float | None = None,
    disc_cut=None,
    flat_per_bin: bool = False,
    key: str | None = None,
    **kwargs,
) -> None:
    """Initialise properties of roc curve object.

    Parameters
    ----------
    x_var_sig : np.ndarray
        Values for x-axis variable for signal
    disc_sig : np.ndarray
        Discriminant values for signal
    x_var_bkg : np.ndarray, optional
        Values for x-axis variable for background, by default None
    disc_bkg : np.ndarray, optional
        Discriminant values for background, by default None
    bins : int or sequence of scalars, optional
        If bins is an int, it defines the number of equal-width bins in the
        given range (10, by default). If bins is a sequence, it defines a
        monotonically increasing array of bin edges, including the
        rightmost edge, allowing for non-uniform bin widths, by default 10
    working_point : float, optional
        Working point, by default None
    disc_cut : float or  sequence of floats, optional
        Cut value for discriminant, if it is a sequence it has to have the same
        length as number of bins, by default None
    flat_per_bin : bool, optional
        If True and no `disc_cut` is given the signal efficiency is held constant
        in each bin, by default False
    key : str, optional
        Identifier for the curve e.g. tagger, by default None
    **kwargs : kwargs
        Keyword arguments passed to `PlotLineObject`

    Raises
    ------
    ValueError
        If provided options are not compatible with each other
    """
    if len(x_var_sig) != len(disc_sig):
        raise ValueError(
            f"Length of `x_var_sig` ({len(x_var_sig)}) and `disc_sig` "
            f"({len(disc_sig)}) have to be identical."
        )
    if x_var_bkg is not None and len(x_var_bkg) != len(disc_bkg):
        raise ValueError(
            f"Length of `x_var_bkg` ({len(x_var_bkg)}) and `disc_bkg` "
            f"({len(disc_bkg)}) have to be identical."
        )
    # checking that the given options are compatible
    # could also think about porting it to a class function insted of passing
    # the arguments to init e.g. `set_method`
    if working_point is None and disc_cut is None:
        raise ValueError("Either `wp` or `disc_cut` needs to be specified.")
    if flat_per_bin:
        if disc_cut is not None:
            raise ValueError(
                "You cannot specify `disc_cut` when `flat_per_bin` is set to True."
            )
        if working_point is None:
            raise ValueError(
                "You need to specify a working point `wp`, when `flat_per_bin` is"
                " set to True."
            )
    self.x_var_sig = np.array(x_var_sig)
    self.disc_sig = np.array(disc_sig)
    self.x_var_bkg = None if x_var_bkg is None else np.array(x_var_bkg)
    self.disc_bkg = None if disc_bkg is None else np.array(disc_bkg)
    self.working_point = working_point
    self.disc_cut = disc_cut
    self.flat_per_bin = flat_per_bin
    # Binning related variables
    self.n_bins = None
    self.bn_edges = None
    self.x_bin_centres = None
    self.bin_widths = None
    self.n_bins = None
    # Binned distributions
    self.bin_indices_sig = None
    self.disc_binned_sig = None
    self.bin_indices_bkg = None
    self.disc_binned_bkg = None

    self._set_bin_edges(bins)

    if disc_cut is not None:
        if working_point is not None:
            raise ValueError("You cannot specify `disc_cut` when providing `wp`.")
        if isinstance(disc_cut, (list, np.ndarray)) and self.n_bins != len(disc_cut):
            raise ValueError(
                "`disc_cut` has to be a float or has to have the same length as"
                " number of bins."
            )
    self._apply_binning()
    self._get_disc_cuts()

    VarVsVar.__init__(
        self,
        x_var=self.x_bin_centres,
        y_var_mean=np.zeros_like(self.x_bin_centres),
        y_var_std=np.zeros_like(self.x_bin_centres),
        x_var_widths=2 * self.bin_widths,
        key=key,
        fill=True,
        plot_y_std=False,
        **kwargs,
    )
    self.inverse_cut = False

bkg_eff property #

Calculate background efficiency per bin.

Returns:

Type Description
numpy.ndarray

Efficiency

numpy.ndarray

Efficiency_error

bkg_eff_sig_err property #

Calculate signal efficiency per bin, assuming a flat background per bin. This results in returning the signal efficiency per bin, but the background error per bin.

bkg_rej property #

Calculate background rejection per bin.

Returns:

Type Description
numpy.ndarray

Rejection

numpy.ndarray

Rejection_error

sig_eff property #

Calculate signal efficiency per bin.

Returns:

Type Description
numpy.ndarray

Efficiency

numpy.ndarray

Efficiency_error

sig_rej property #

Calculate signal rejection per bin.

Returns:

Type Description
numpy.ndarray

Rejection

numpy.ndarray

Rejection_error

efficiency #

Calculate efficiency and the associated error.

Parameters:

Name Type Description Default
arr numpy.ndarray

Array with discriminants

required
cut float

Cut value

required

Returns:

Type Description
float

Efficiency

float

Efficiency error

Source code in puma/var_vs_eff.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def efficiency(self, arr: np.ndarray, cut: float):
    """Calculate efficiency and the associated error.

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float
        Cut value

    Returns
    -------
    float
        Efficiency
    float
        Efficiency error
    """
    if len(arr) == 0:
        return 0, 0
    eff = sum(arr < cut) / len(arr) if self.inverse_cut else sum(arr > cut) / len(arr)
    eff_error = eff_err(eff, len(arr))
    return eff, eff_error

get #

Wrapper around rejection and efficiency functions.

Parameters:

Name Type Description Default
mode str

Can be "sig_eff", "bkg_eff", "sig_rej", "bkg_rej", or "bkg_eff_sig_err"

required
inverse_cut bool

Inverts the discriminant cut, which will yield the efficiency or rejection of the jets not passing the working point, by default False

False

Returns:

Type Description
numpy.ndarray

Rejection or efficiency depending on mode value

numpy.ndarray

Rejection or efficiency error depending on mode value

Raises:

Type Description
ValueError

If mode not supported

Source code in puma/var_vs_eff.py
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
def get(self, mode: str, inverse_cut: bool = False):
    """Wrapper around rejection and efficiency functions.

    Parameters
    ----------
    mode : str
        Can be "sig_eff", "bkg_eff", "sig_rej", "bkg_rej", or
        "bkg_eff_sig_err"
    inverse_cut : bool, optional
        Inverts the discriminant cut, which will yield the efficiency or rejection
        of the jets not passing the working point, by default False

    Returns
    -------
    np.ndarray
        Rejection or efficiency depending on `mode` value
    np.ndarray
        Rejection or efficiency error depending on `mode` value

    Raises
    ------
    ValueError
        If mode not supported
    """
    self.inverse_cut = inverse_cut
    if mode == "sig_eff":
        return self.sig_eff
    if mode == "bkg_eff":
        return self.bkg_eff
    if mode == "sig_rej":
        return self.sig_rej
    if mode == "bkg_rej":
        return self.bkg_rej
    if mode == "bkg_eff_sig_err":
        return self.bkg_eff_sig_err
    # setting class variable again to False
    self.inverse_cut = False
    raise ValueError(
        f"The selected mode {mode} is not supported. Use one of the following:"
        f" {VarVsEffPlot.mode_options}."
    )

rejection #

Calculate rejection and the associated error.

Parameters:

Name Type Description Default
arr numpy.ndarray

Array with discriminants

required
cut float

Cut value

required

Returns:

Type Description
float

Rejection

float

Rejection error

Source code in puma/var_vs_eff.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def rejection(self, arr: np.ndarray, cut: float):
    """Calculate rejection and the associated error.

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float
        Cut value

    Returns
    -------
    float
        Rejection
    float
        Rejection error
    """
    if self.inverse_cut:
        rej = save_divide(len(arr), sum(arr < cut), default=np.inf)
    else:
        rej = save_divide(len(arr), sum(arr > cut), default=np.inf)
    if rej == np.inf:
        logger.warning("Your rejection is infinity -> setting it to np.nan.")
        return np.nan, np.nan
    rej_error = rej_err(rej, len(arr))
    return rej, rej_error

puma.var_vs_eff.VarVsEffPlot #

Bases: puma.var_vs_var.VarVsVarPlot

var_vs_eff plot class.

var_vs_eff plot properties.

Parameters:

Name Type Description Default
mode str

Defines which quantity is plotted, the following options ar available: sig_eff - Plots signal efficiency vs. variable, with statistical error on N signal per bin bkg_eff - Plots background efficiency vs. variable, with statistical error on N background per bin sig_rej - Plots signal rejection vs. variable, with statistical error on N signal per bin bkg_rej - Plots background rejection vs. variable, with statistical error on N background per bin bkg_eff_sig_err - Plots background efficiency vs. variable, with statistical error on N signal per bin.

required
grid bool

Set the grid for the plots.

False
**kwargs kwargs

Keyword arguments from puma.PlotObject

{}

Raises:

Type Description
ValueError

If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_eff.py
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
def __init__(self, mode, grid: bool = False, **kwargs) -> None:
    """var_vs_eff plot properties.

    Parameters
    ----------
    mode : str
        Defines which quantity is plotted, the following options ar available:
            sig_eff - Plots signal efficiency vs. variable, with statistical error
                on N signal per bin
            bkg_eff - Plots background efficiency vs. variable, with statistical
                error on N background per bin
            sig_rej - Plots signal rejection vs. variable, with statistical error
                on N signal per bin
            bkg_rej - Plots background rejection vs. variable, with statistical
                error on N background per bin
            bkg_eff_sig_err - Plots background efficiency vs. variable, with
                statistical error on N signal per bin.
    grid : bool, optional
        Set the grid for the plots.
    **kwargs : kwargs
        Keyword arguments from `puma.PlotObject`

    Raises
    ------
    ValueError
        If incompatible mode given or more than 1 ratio panel requested
    """
    super().__init__(grid=grid, **kwargs)
    if mode not in self.mode_options:
        raise ValueError(
            f"The selected mode {mode} is not supported. Use one of the following: "
            f"{self.mode_options}."
        )
    self.mode = mode

apply_modified_atlas_second_tag #

Modifies the atlas_second_tag to include info on the type of p-eff plot being displayed.

Source code in puma/var_vs_eff.py
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
def apply_modified_atlas_second_tag(
    self,
    signal,
    working_point=None,
    disc_cut=None,
    flat_per_bin=False,
):
    """Modifies the atlas_second_tag to include info on the type of p-eff plot
    being displayed.
    """
    if working_point:
        mid_str = f"{round(working_point * 100, 3)}% " + signal.eff_str
    elif disc_cut:
        mid_str = rf"$D_{{{signal.name.rstrip('jets')}}}$ > {disc_cut}"
    tag = f"Flat {mid_str} per bin" if flat_per_bin else f"{mid_str}"
    if self.atlas_second_tag:
        self.atlas_second_tag = f"{self.atlas_second_tag}\n{tag}"
    else:
        self.atlas_second_tag = tag

plot #

Plotting curves.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments passed to plt.axis.errorbar

{}

Returns:

Type Description
puma.line_plot_2d.Line2D

matplotlib Line2D object

Source code in puma/var_vs_eff.py
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
def plot(self, **kwargs):
    """Plotting curves.

    Parameters
    ----------
    **kwargs: kwargs
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves with mode %s", self.mode)
    self._setup_curves()
    return super().plot(**kwargs)

puma.var_vs_vtx.VarVsVtx #

Bases: puma.var_vs_var.VarVsVar

var_vs_vtx class storing info about vertexing performance.

Initialise properties of roc curve object.

Parameters:

Name Type Description Default
x_var numpy.ndarray

Values for x-axis variable for signal

required
n_match numpy.ndarray

Values for number of correctly identified objects (where truth and reco match)

required
n_true numpy.ndarray

Values for true number of objects

required
n_reco numpy.ndarray

Values for reconstructed number of objects

required
bins int or sequence of scalars

If bins is an int, it defines the number of equal-width bins in the given range (10, by default). If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths, by default 10

10
key str

Identifier for the curve e.g. tagger, by default None

None
**kwargs kwargs

Keyword arguments passed to PlotLineObject

{}

Raises:

Type Description
ValueError

If provided options are not compatible with each other

Source code in puma/var_vs_vtx.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def __init__(
    self,
    x_var: np.ndarray,
    n_match: np.ndarray,
    n_true: np.ndarray,
    n_reco: np.ndarray,
    bins=10,
    key: str | None = None,
    **kwargs,
) -> None:
    """Initialise properties of roc curve object.

    Parameters
    ----------
    x_var : np.ndarray
        Values for x-axis variable for signal
    n_match : np.ndarray
        Values for number of correctly identified objects (where truth and
        reco match)
    n_true : np.ndarray
        Values for true number of objects
    n_reco : np.ndarray
        Values for reconstructed number of objects
    bins : int or sequence of scalars, optional
        If bins is an int, it defines the number of equal-width bins in the
        given range (10, by default). If bins is a sequence, it defines a
        monotonically increasing array of bin edges, including the
        rightmost edge, allowing for non-uniform bin widths, by default 10
    key : str, optional
        Identifier for the curve e.g. tagger, by default None
    **kwargs : kwargs
        Keyword arguments passed to `PlotLineObject`

    Raises
    ------
    ValueError
        If provided options are not compatible with each other
    """
    if len(x_var) != len(n_match):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_match` "
            f"({len(n_match)}) have to be identical."
        )
    if len(x_var) != len(n_true):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_true` "
            f"({len(n_true)}) have to be identical."
        )
    if len(x_var) != len(n_reco):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_reco` "
            f"({len(n_reco)}) have to be identical."
        )

    self.x_var = np.array(x_var)
    self.n_match = np.array(n_match)
    self.n_true = np.array(n_true)
    self.n_reco = np.array(n_reco)
    # Binning related variables
    self.n_bins = None
    self.bin_edges = None
    self.x_bin_centres = None
    self.bin_widths = None
    # Binned distributions
    self.bin_indices = None
    self.metric_binned = None

    self._set_bin_edges(bins)
    self._apply_binning()

    VarVsVar.__init__(
        self,
        x_var=self.x_bin_centres,
        y_var_mean=np.zeros_like(self.x_bin_centres),
        y_var_std=np.zeros_like(self.x_bin_centres),
        x_var_widths=2 * self.bin_widths,
        key=key,
        fill=True,
        plot_y_std=False,
        **kwargs,
    )

efficiency property #

Calculate vertexing efficiency per bin. Defined as number of reconstructed vertices matched to truth divided by number of total true vertices.

Returns:

Type Description
numpy.ndarray

Efficiency

numpy.ndarray

Efficiency error

fakes property #

Calculate vertexing fake rate per bin. Defined as total number of events with reconstructed vertices where vertices are not expected.

Returns:

Type Description
numpy.ndarray

Fake rate

numpy.ndarray

Fake rate error

purity property #

Calculate vertexing purity per bin. Defined as number of reconstructed vertices matched to truth divided by number of total reconstructed vertices.

Returns:

Type Description
numpy.ndarray

Purity

numpy.ndarray

Purity error

get #

Wrapper around rejection and efficiency functions.

Parameters:

Name Type Description Default
mode str

Can be "efficiency", "purity" or "fakes"

required

Returns:

Type Description
numpy.ndarray

Efficiency, purity or fake rate depending on mode value

numpy.ndarray

Efficiency, purity or fake rate error depending on mode value

Raises:

Type Description
ValueError

If mode not supported

Source code in puma/var_vs_vtx.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def get(self, mode: str):
    """Wrapper around rejection and efficiency functions.

    Parameters
    ----------
    mode : str
        Can be "efficiency", "purity" or "fakes"

    Returns
    -------
    np.ndarray
        Efficiency, purity or fake rate depending on `mode` value
    np.ndarray
        Efficiency, purity or fake rate error depending on `mode` value

    Raises
    ------
    ValueError
        If mode not supported
    """
    if mode == "efficiency":
        return self.efficiency
    if mode == "purity":
        return self.purity
    if mode == "fakes":
        return self.fakes
    raise ValueError(
        f"The selected mode {mode} is not supported. Use one of the following:"
        f" {VarVsVtxPlot.mode_options}."
    )

get_performance_ratio #

Calculate performance ratio for vertexing task. Either n_matched/n_true (efficiency) or n_matched/n_reco (purity).

Parameters:

Name Type Description Default
arr numpy.ndarray

Array with discriminants

required
cut float

Cut value

required

Returns:

Type Description
float

Performance ratio

float

Performance ratio error

Source code in puma/var_vs_vtx.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def get_performance_ratio(self, num: np.ndarray, denom: np.ndarray):
    """Calculate performance ratio for vertexing task. Either n_matched/n_true
    (efficiency) or n_matched/n_reco (purity).

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float
        Cut value

    Returns
    -------
    float
        Performance ratio
    float
        Performance ratio error
    """
    pm = save_divide(np.sum(num), np.sum(denom), default=np.inf)
    if pm == np.inf:
        logger.warning("Your vertexing performance ratio is infinity -> setting it to np.nan.")
        return np.nan, np.nan
    if pm == 0:
        logger.warning("Your vertexing performance ratio is zero -> setting error to zero.")
        return 0.0, 0.0
    pm_error = eff_err(pm, len(num))
    return pm, pm_error

puma.var_vs_vtx.VarVsVtxPlot #

Bases: puma.var_vs_var.VarVsVarPlot

var_vs_vtx plot properties.

Parameters:

Name Type Description Default
mode str

Defines which quantity is plotted, the following options ar available: efficiency - Plots efficiency vs. variable for jets where vertices are expected purity - Plots purity vs. variable for jets where vertices are expected fakes - Plots fake rate vs. variable for jets where vertices are not expected

required
grid bool

Set the grid for the plots.

False
**kwargs kwargs

Keyword arguments from puma.PlotObject

{}

Raises:

Type Description
ValueError

If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_vtx.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def __init__(self, mode, grid: bool = False, **kwargs) -> None:
    """var_vs_vtx plot properties.

    Parameters
    ----------
    mode : str
        Defines which quantity is plotted, the following options ar available:
            efficiency - Plots efficiency vs. variable for jets where vertices are
            expected
            purity - Plots purity vs. variable for jets where vertices are expected
            fakes - Plots fake rate vs. variable for jets where vertices are not
            expected
    grid : bool, optional
        Set the grid for the plots.
    **kwargs : kwargs
        Keyword arguments from `puma.PlotObject`

    Raises
    ------
    ValueError
        If incompatible mode given or more than 1 ratio panel requested
    """
    super().__init__(grid=grid, **kwargs)
    if mode not in self.mode_options:
        raise ValueError(
            f"The selected mode {mode} is not supported. Use one of the following: "
            f"{self.mode_options}."
        )
    self.mode = mode

plot #

Plotting curves.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments passed to plt.axis.errorbar

{}

Returns:

Type Description
puma.line_plot_2d.Line2D

matplotlib Line2D object

Source code in puma/var_vs_vtx.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
def plot(self, **kwargs):
    """Plotting curves.

    Parameters
    ----------
    **kwargs: kwargs
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves with mode %s", self.mode)
    self._setup_curves()
    return super().plot(**kwargs)