Skip to content

ROC

puma.roc.Roc #

Bases: puma.plot_base.PlotLineObject

Represent a single ROC curve and allows to calculate ratio w.r.t other ROCs.

Initialise properties of roc curve object.

Parameters:

Name Type Description Default
sig_eff numpy.array

Array of signal efficiencies

required
bkg_rej numpy.array

Array of background rejection

required
n_test int

Number of events used to calculate the background efficiencies, by default None

None
signal_class str

Signal class, e.g. for b-tagging "bjets", by default None

None
rej_class str or ftag.Label

Rejection class, e.g. for b-tagging anc charm rejection "cjets", by default None

None
key str

Identifier for roc curve e.g. tagger, by default None

None
ratio_group str

Identifies the reference ROC group for ratio calculation, by default None

None
**kwargs kwargs

Keyword arguments passed to puma.PlotLineObject

{}

Raises:

Type Description
ValueError

If sig_eff and bkg_rej have a different shape

Source code in puma/roc.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def __init__(
    self,
    sig_eff: np.ndarray,
    bkg_rej: np.ndarray,
    n_test: int | None = None,
    rej_class: str | Label = None,
    signal_class: str | None = None,
    key: str | None = None,
    ratio_group: str | None = None,
    **kwargs,
) -> None:
    """Initialise properties of roc curve object.

    Parameters
    ----------
    sig_eff : np.array
        Array of signal efficiencies
    bkg_rej : np.array
        Array of background rejection
    n_test : int
        Number of events used to calculate the background efficiencies,
        by default None
    signal_class : str
        Signal class, e.g. for b-tagging "bjets", by default None
    rej_class : str or Label
        Rejection class, e.g. for b-tagging anc charm rejection "cjets",
        by default None
    key : str
        Identifier for roc curve e.g. tagger, by default None
    ratio_group : str, optional
        Identifies the reference ROC group for ratio calculation, by default None
    **kwargs : kwargs
        Keyword arguments passed to `puma.PlotLineObject`

    Raises
    ------
    ValueError
        If `sig_eff` and `bkg_rej` have a different shape
    """
    super().__init__(**kwargs)
    if len(sig_eff) != len(bkg_rej):
        raise ValueError(
            f"The shape of `sig_eff` ({np.shape(sig_eff)}) and `bkg_rej` "
            f"({np.shape(bkg_rej)}) have to be identical."
        )
    self.sig_eff = sig_eff
    self.bkg_rej = bkg_rej
    self.n_test = None if n_test is None else int(n_test)
    self.signal_class = signal_class
    self.rej_class = Flavours[rej_class] if isinstance(rej_class, str) else rej_class
    self.key = key
    self.ratio_group = ratio_group if ratio_group else str(rej_class)

non_zero property #

Abstraction of non_zero_mask.

Returns:

Type Description
numpy.array

Masked signal efficiency

numpy.array

Masked background rejection

non_zero_mask property #

Masking points where rejection is 0 and no signal efficiency change present.

Returns:

Type Description
numpy.array

Masked indices

binomial_error #

Calculate binomial error of roc curve.

Parameters:

Name Type Description Default
norm bool

If True calulate relative error, by default False

False
n_test int

Number of events used to calculate the background efficiencies, by default None

None

Returns:

Type Description
numpy.array

Binomial error

Raises:

Type Description
ValueError

If no n_test was provided

Source code in puma/roc.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def binomial_error(self, norm: bool = False, n_test: int | None = None) -> np.ndarray:
    """Calculate binomial error of roc curve.

    Parameters
    ----------
    norm : bool
        If True calulate relative error, by default False
    n_test : int
        Number of events used to calculate the background efficiencies,
        by default None

    Returns
    -------
    numpy.array
        Binomial error

    Raises
    ------
    ValueError
        If no `n_test` was provided
    """
    if n_test is None:
        n_test = self.n_test
    if n_test is None:
        raise ValueError("No `n_test` provided, cannot calculate binomial error!")
    return rej_err(self.bkg_rej[self.non_zero_mask], n_test, norm=norm)

divide #

Calculate ratio between the roc curve and another roc.

Parameters:

Name Type Description Default
roc_comp roc class

Second roc curve to calculate ratio with

required
inverse bool

If False the ratio is calculated this_roc / roc_comp, if True the inverse is calculated

False

Returns:

Type Description
numpy.array

Signal efficiency used for the ratio calculation which is the overlapping interval of the two roc curves

numpy.array

Ratio

numpy.array or None

Ratio_err if n_test was provided to class

Source code in puma/roc.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def divide(self, roc_comp, inverse: bool = False):
    """Calculate ratio between the roc curve and another roc.

    Parameters
    ----------
    roc_comp : roc class
        Second roc curve to calculate ratio with
    inverse : bool
        If False the ratio is calculated `this_roc / roc_comp`,
        if True the inverse is calculated

    Returns
    -------
    np.array
        Signal efficiency used for the ratio calculation which is the overlapping
        interval of the two roc curves
    np.array
        Ratio
    np.array or None
        Ratio_err if `n_test` was provided to class
    """
    if not np.array_equal(self.sig_eff, roc_comp.sig_eff):
        raise ValueError("Signal efficiencies of the two ROCs do not match.")

    ratio = self.bkg_rej / roc_comp.bkg_rej
    if inverse:
        ratio = 1 / ratio

    ratio_err = self.binomial_error(norm=True) * ratio if self.n_test else None
    return self.sig_eff, ratio, ratio_err

puma.roc.RocPlot #

Bases: puma.plot_base.PlotBase

ROC plot class.

ROC plot properties.

Parameters:

Name Type Description Default
grid bool

Set the grid for the plots.

True
**kwargs kwargs

Keyword arguments from puma.PlotObject

{}
Source code in puma/roc.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def __init__(self, grid: bool = True, **kwargs) -> None:
    """ROC plot properties.

    Parameters
    ----------
    grid : bool, optional
        Set the grid for the plots.
    **kwargs : kwargs
        Keyword arguments from `puma.PlotObject`
    """
    super().__init__(grid=grid, **kwargs)
    self.test = ""
    self.rocs = {}
    self.roc_ratios = {}
    self.rej_axes = {}
    self.rej_class_ls = {}
    self.label_colours = {}
    self.leg_rej_labels = {}
    self.reference_roc = None
    self.initialise_figure()
    self.fig.get_layout_engine().set(h_pad=0, hspace=0)
    self.eff_min, self.eff_max = (1, 0)
    self.default_linestyles = get_good_linestyles()
    self.legend_flavs = None
    self.rej_leg_loc = "ratio" if kwargs["n_ratio_panels"] > 0 else "lower left"

add_ratios #

Calculating ratios.

Raises:

Type Description
ValueError

If number of reference rocs and ratio panels don't match

ValueError

If no ratio classes are set

Source code in puma/roc.py
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
def add_ratios(self):
    """Calculating ratios.

    Raises
    ------
    ValueError
        If number of reference rocs and ratio panels don't match
    ValueError
        If no ratio classes are set
    """
    if self.reference_roc and len(self.reference_roc) != self.n_ratio_panels:
        raise ValueError(
            f"{len(self.reference_roc)} reference rocs defined but requested "
            f"{self.n_ratio_panels} ratio panels."
        )
    if len(self.rej_axes) != self.n_ratio_panels:
        raise ValueError("Ratio classes not set, set them first with `set_ratio_class`.")

    for rej_class, axis in self.rej_axes.items():
        self.plot_ratios(axis=axis, rej_class=rej_class)

add_roc #

Adding puma.Roc object to figure.

Parameters:

Name Type Description Default
roc_curve puma.Roc

ROC curve

required
key str

Unique identifier for roc_curve, by default None

None
reference bool

If roc is used as reference for ratio calculation, by default False

False

Raises:

Type Description
KeyError

If unique identifier key is used twice

Source code in puma/roc.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def add_roc(
    self,
    roc_curve: object,
    key: str | None = None,
    reference: bool = False,
):
    """Adding puma.Roc object to figure.

    Parameters
    ----------
    roc_curve : puma.Roc
        ROC curve
    key : str, optional
        Unique identifier for roc_curve, by default None
    reference : bool, optional
        If roc is used as reference for ratio calculation, by default False

    Raises
    ------
    KeyError
        If unique identifier key is used twice
    """
    if key is None:
        key = len(self.rocs) + 1
    if key in self.rocs:
        raise KeyError(f"Duplicated key {key} already used for roc unique identifier.")

    self.rocs[key] = roc_curve
    # set linestyle
    if roc_curve.rej_class not in self.rej_class_ls:
        self.rej_class_ls[roc_curve.rej_class] = (
            self.default_linestyles[len(self.rej_class_ls)]
            if roc_curve.linestyle is None
            else roc_curve.linestyle
        )
    elif (
        roc_curve.linestyle != self.rej_class_ls[roc_curve.rej_class]
        and roc_curve.linestyle is not None
    ):
        logger.warning(
            "You specified a different linestyle for the same rejection class "
            "%s. Will keep the linestyle defined first.",
            roc_curve.rej_class,
        )
    if roc_curve.linestyle is None:
        roc_curve.linestyle = self.rej_class_ls[roc_curve.rej_class]

    # set colours
    if roc_curve.label not in self.label_colours:
        self.label_colours[roc_curve.label] = (
            get_good_colours()[len(self.label_colours)]
            if roc_curve.colour is None
            else roc_curve.colour
        )
    elif (
        roc_curve.colour != self.label_colours[roc_curve.label] and roc_curve.colour is not None
    ):
        logger.warning(
            "You specified a different colour for the same label"
            " %s. This will lead to a mismatch in the line colours"
            " and the legend.",
            roc_curve.label,
        )
    if roc_curve.colour is None:
        roc_curve.colour = self.label_colours[roc_curve.label]

    if reference:
        logger.debug("Setting roc %s as reference for %s.", key, roc_curve.rej_class)
        self.set_roc_reference(key, roc_curve.rej_class, roc_curve.ratio_group)
        self.reference_label = roc_curve.label

draw #

Draw plotting.

Parameters:

Name Type Description Default
labelpad int

Spacing in points from the axes bounding box including ticks and tick labels, by default None

None
Source code in puma/roc.py
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
def draw(
    self,
    labelpad: int | None = None,
):
    """Draw plotting.

    Parameters
    ----------
    labelpad : int, optional
        Spacing in points from the axes bounding box including
        ticks and tick labels, by default None
    """
    plt_handles = self.plot_roc()
    xmin, xmax = self.get_xlim_auto()

    self.set_xlim(
        xmin if self.xmin is None else self.xmin,
        xmax if self.xmax is None else self.xmax,
    )
    if self.n_ratio_panels > 0:
        self.add_ratios()
    self.set_title()
    self.set_log()
    self.set_y_lim()
    self.set_xlabel()
    self.set_ylabel(self.axis_top)

    # set ylabel for ratio panels
    if self.n_ratio_panels > 0:
        self.set_ylabel(
            list(self.rej_axes.values())[-1],
            f"Ratio to {self.reference_label}",
            align="left",
            labelpad=labelpad,
        )

    if self.n_ratio_panels < 2:
        self.make_legend(plt_handles, ax_mpl=self.axis_top)
    else:
        if not self.leg_rej_labels:
            for rej_class in self.rej_axes:
                self.leg_rej_labels[rej_class] = rej_class
        self.make_split_legend(handles=plt_handles)

    self.plotting_done = True
    if self.apply_atlas_style is True:
        self.atlasify()
        # atlasify can only handle one legend. Therefore, we remove the frame of
        # the second legend by hand
        if self.legend_flavs is not None:
            self.legend_flavs.set_frame_on(False)

    adjust_ylabels(self.fig, self.rej_axes.values())

get_xlim_auto #

Returns min and max efficiency values.

Returns:

Type Description
float

Min and max efficiency values

Source code in puma/roc.py
377
378
379
380
381
382
383
384
385
386
387
388
389
def get_xlim_auto(self):
    """Returns min and max efficiency values.

    Returns
    -------
    float
        Min and max efficiency values
    """
    for elem in self.rocs.values():
        self.eff_min = min(np.min(elem.sig_eff), self.eff_min)
        self.eff_max = max(np.max(elem.sig_eff), self.eff_min)

    return self.eff_min, self.eff_max

make_split_legend #

Draw legend for the case of 2 ratios, splitting up legend into models and rejection class.

Parameters:

Name Type Description Default
handles list

List of Line2D objects to extract info for legend

required

Raises:

Type Description
ValueError

If not 2 ratios requested

Source code in puma/roc.py
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
def make_split_legend(self, handles):
    """Draw legend for the case of 2 ratios, splitting up legend into models and
    rejection class.

    Parameters
    ----------
    handles : list
        List of Line2D objects to extract info for legend

    Raises
    ------
    ValueError
        If not 2 ratios requested
    """
    if self.n_ratio_panels < 2:
        raise ValueError("For a split legend you need 2 ratio panels.")

    if self.rej_leg_loc == "ratio":
        for rej_class, axis in self.rej_axes.items():
            legend_line = mpl.lines.Line2D(
                [],
                [],
                color="k",
                label=self.leg_rej_labels[rej_class],
                linestyle=self.rej_class_ls[rej_class],
            )
            axis.legend(
                handles=[legend_line],
                labels=[legend_line.get_label()],
                loc="upper right",
                fontsize=self.leg_fontsize,
            )

    else:
        line_list_rej = [
            mpl.lines.Line2D(
                [],
                [],
                color="k",
                label=self.leg_rej_labels[rej_class],
                linestyle=self.rej_class_ls[rej_class],
            )
            for rej_class in self.rej_axes
        ]

        self.legend_flavs = self.axis_top.legend(
            handles=line_list_rej,
            labels=[handle.get_label() for handle in line_list_rej],
            loc=self.rej_leg_loc,
            fontsize=self.leg_fontsize,
            ncol=self.leg_ncol,
        )

        # Add the second legend to plot
        self.axis_top.add_artist(self.legend_flavs)

    # Get the labels for the legends
    labels_list = []
    lines_list = []

    for line in handles:
        if line.get_label() not in labels_list:
            labels_list.append(line.get_label())
            lines_list.append(line)

    # Define the legend
    self.axis_top.legend(
        handles=lines_list,
        labels=labels_list,
        loc=self.leg_loc,
        fontsize=self.leg_fontsize,
        ncol=self.leg_ncol,
    )

plot_ratios #

Plotting ratio curves.

Parameters:

Name Type Description Default
axis matplotlib.pyplot.axis

matplotlib axis object

required
rej_class str

Rejection class

required
Source code in puma/roc.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
def plot_ratios(self, axis: plt.axis, rej_class: str):
    """Plotting ratio curves.

    Parameters
    ----------
    axis : plt.axis
        matplotlib axis object
    rej_class : str
        Rejection class
    """
    for key, elem in self.rocs.items():
        if elem.rej_class != rej_class:
            continue
        sig_eff, ratio, ratio_err = elem.divide(
            self.rocs[self.reference_roc[rej_class][elem.ratio_group]]
        )

        self.roc_ratios[key] = (sig_eff, ratio, ratio_err)
        axis.plot(
            sig_eff,
            ratio,
            color=elem.colour,
            linestyle=elem.linestyle,
            linewidth=2.0,
        )
        if ratio_err is not None:
            axis.fill_between(
                sig_eff,
                ratio - ratio_err,
                ratio + ratio_err,
                color=elem.colour,
                alpha=0.25,
                edgecolor="none",
                zorder=1,
            )

plot_roc #

Plotting roc curves.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments passed to plt.axis.plot

{}

Returns:

Type Description
puma.line_plot_2d.Line2D

matplotlib Line2D object

Source code in puma/roc.py
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
def plot_roc(self, **kwargs) -> mpl.lines.Line2D:
    """Plotting roc curves.

    Parameters
    ----------
    **kwargs: kwargs
        Keyword arguments passed to plt.axis.plot

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    plt_handles = []
    for key, elem in self.rocs.items():
        plt_handles = plt_handles + self.axis_top.plot(
            elem.sig_eff[elem.non_zero_mask],
            elem.bkg_rej[elem.non_zero_mask],
            linestyle=elem.linestyle,
            linewidth=2,
            color=elem.colour,
            label=elem.label if elem is not None else key,
            zorder=2,
            **kwargs,
        )
        if elem.n_test is not None:
            # if uncertainties are available for roc plotting their uncertainty as
            # a band around the roc itself
            rej_band_down = (
                elem.bkg_rej[elem.non_zero_mask] - elem.binomial_error()[elem.non_zero_mask]
            )
            rej_band_up = (
                elem.bkg_rej[elem.non_zero_mask] + elem.binomial_error()[elem.non_zero_mask]
            )
            self.axis_top.fill_between(
                elem.sig_eff[elem.non_zero_mask],
                rej_band_down,
                rej_band_up,
                color=elem.colour,
                alpha=0.25,
                edgecolor="none",
                zorder=2,
            )
    return plt_handles

set_ratio_class #

Associate the rejection class to a ratio panel adn set the legend label.

Parameters:

Name Type Description Default
ratio_panel int

Ratio panel either 1 or 2

required
rej_class Labels

Rejeciton class associated to that panel

required

Raises:

Type Description
ValueError

if requested ratio panels and given ratio_panel do not match.

Source code in puma/roc.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
def set_ratio_class(self, ratio_panel: int, rej_class: str | Label):
    """Associate the rejection class to a ratio panel adn set the legend label.

    Parameters
    ----------
    ratio_panel : int
        Ratio panel either 1 or 2
    rej_class : Labels
        Rejeciton class associated to that panel

    Raises
    ------
    ValueError
        if requested ratio panels and given ratio_panel do not match.
    """
    rej_class = Flavours[rej_class] if isinstance(rej_class, str) else rej_class
    self.rej_axes[rej_class] = self.ratio_axes[ratio_panel - 1]
    label = rej_class.label.replace("jets", "jet")
    self.set_ratio_label(ratio_panel, f"{label} ratio")
    self.leg_rej_labels[rej_class] = rej_class.label

set_roc_reference #

Setting the reference roc curves used in the ratios.

Parameters:

Name Type Description Default
key str

Unique identifier of roc object

required
rej_class str

Rejection class encoded in roc curve

required
ratio_group str

Ratio group this roc is reference for, by default None

None

Raises:

Type Description
ValueError

If more rejection classes are set than actual ratio panels available.

Source code in puma/roc.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def set_roc_reference(
    self,
    key: str,
    rej_class: Label,
    ratio_group: str | None = None,
):
    """Setting the reference roc curves used in the ratios.

    Parameters
    ----------
    key : str
        Unique identifier of roc object
    rej_class : str
        Rejection class encoded in roc curve
    ratio_group : str
        Ratio group this roc is reference for, by default None

    Raises
    ------
    ValueError
        If more rejection classes are set than actual ratio panels available.
    """
    if self.reference_roc is None:
        self.reference_roc = {}
        self.reference_roc[rej_class] = {ratio_group: key}
    elif rej_class not in self.reference_roc:
        if len(self.reference_roc) >= self.n_ratio_panels:
            raise ValueError(
                "You cannot set more rejection classes than available ratio panels."
            )
        self.reference_roc[rej_class] = {ratio_group: key}
    else:
        if self.reference_roc[rej_class].get(ratio_group):
            logger.warning(
                "You specified a second roc curve %s as reference for ratio. "
                "Using it as new reference instead of %s.",
                key,
                self.reference_roc[rej_class][ratio_group],
            )
        self.reference_roc[rej_class][ratio_group] = key