ROC

`puma.roc.Roc` #

Bases: puma.plot_base.PlotLineObject

Represent a single ROC curve and allows to calculate ratio w.r.t other ROCs.

Parameters:

Name	Type	Description	Default
`sig_eff`	`numpy.ndarray`	Array of signal efficiencies	required
`bkg_rej`	`numpy.ndarray`	Array of background rejection	required
`n_test`	`int \| None`	Number of events used to calculate the background efficiencies, by default None	`None`
`rej_class`	`str \| ftag.Label`	Rejection class, e.g. for b-tagging anc charm rejection "cjets", by default None	`None`
`signal_class`	`str \| None`	Signal class, e.g. for b-tagging "bjets", by default None	`None`
`key`	`str \| None`	Identifier for roc curve e.g. tagger, by default None	`None`
`ratio_group`	`str \| None`	Identifies the reference ROC group for ratio calculation, by default None	`None`
`use_bkg_eff`	`bool`	Correct error calculation for background efficiency ROCs instead of background rejection, by default False	`False`
`**kwargs`	`typing.Any`	Keyword arguments passed to `puma.PlotLineObject`	`{}`

Raises:

Type	Description
`ValueError`	If `sig_eff` and `bkg_rej` have a different shape

Source code in puma/roc.py

def __init__(
    self,
    sig_eff: np.ndarray,
    bkg_rej: np.ndarray,
    n_test: int | None = None,
    rej_class: str | Label = None,
    signal_class: str | None = None,
    key: str | None = None,
    ratio_group: str | None = None,
    use_bkg_eff: bool = False,
    **kwargs: Any,
) -> None:
    super().__init__(**kwargs)
    if len(sig_eff) != len(bkg_rej):
        raise ValueError(
            f"The shape of `sig_eff` ({np.shape(sig_eff)}) and `bkg_rej` "
            f"({np.shape(bkg_rej)}) have to be identical."
        )
    self.sig_eff = sig_eff
    self.bkg_rej = bkg_rej
    self.n_test = None if n_test is None else int(n_test)
    self.signal_class = signal_class
    self.rej_class = rej_class.name if isinstance(rej_class, Label) else rej_class
    self.key = key
    self.ratio_group = ratio_group or str(rej_class)
    self.use_bkg_eff = use_bkg_eff
    self.kwargs = kwargs

    # Check that the rejection class is a string
    if self.rej_class and not isinstance(self.rej_class, str):
        raise ValueError(
            f"'rej_class' must either be a string or a Label! You gave {type(self.rej_class)}"
        )

`args_to_store` `property` #

Returns the arguments that need to be stored/loaded.

Returns:

Type	Description
`dict[str, typing.Any]`	Dict with the arguments

`non_zero` `property` #

Abstraction of non_zero_mask.

Returns:

Type	Description
`numpy.array`	Masked signal efficiency
`numpy.array`	Masked background rejection

`non_zero_mask` `property` #

Masking points where rejection is 0 and no signal efficiency change present.

Returns:

Type	Description
`numpy.array`	Masked indices

`binomial_error` #

Calculate binomial error of roc curve.

Parameters:

Name	Type	Description	Default
`norm`	`bool`	If True calulate relative error, by default False	`False`
`n_test`	`int \| None`	Number of events used to calculate the background efficiencies, by default None	`None`

Returns:

Type	Description
`numpy.ndarray`	Binomial error

Raises:

Type	Description
`ValueError`	If no `n_test` was provided

Source code in puma/roc.py

def binomial_error(self, norm: bool = False, n_test: int | None = None) -> np.ndarray:
    """Calculate binomial error of roc curve.

    Parameters
    ----------
    norm : bool, optional
        If True calulate relative error, by default False
    n_test : int | None
        Number of events used to calculate the background efficiencies,
        by default None

    Returns
    -------
    np.ndarray
        Binomial error

    Raises
    ------
    ValueError
        If no `n_test` was provided
    """
    if n_test is None:
        n_test = self.n_test
    if n_test is None:
        raise ValueError("No `n_test` provided, cannot calculate binomial error!")
    if self.use_bkg_eff:
        return calculate_efficiency_error(self.bkg_rej[self.non_zero_mask], n_test, norm=norm)
    return calculate_rejection_error(self.bkg_rej[self.non_zero_mask], n_test, norm=norm)

`divide` #

Calculate ratio between the roc curve and another roc.

Parameters:

Name	Type	Description	Default
`roc_comp`	`puma.roc.Roc`	Second roc curve to calculate ratio with	required
`inverse`	`bool`	If False the ratio is calculated `this_roc / roc_comp`, if True the inverse is calculated	`False`

Returns:

Type	Description
`tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]`	The signal efficiency used for the ratio calculation which is the overlapping interval of the two roc curves The ratio between the two ROCs The ratio error between the two ROCs

Raises:

Type	Description
`ValueError`	If the signal efficiency between the two ROCs do not match

Source code in puma/roc.py

def divide(
    self,
    roc_comp: Roc,
    inverse: bool = False,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Calculate ratio between the roc curve and another roc.

    Parameters
    ----------
    roc_comp : Roc
        Second roc curve to calculate ratio with
    inverse : bool
        If False the ratio is calculated `this_roc / roc_comp`,
        if True the inverse is calculated

    Returns
    -------
    tuple[np.ndarray, np.ndarray, np.ndarray]
        The signal efficiency used for the ratio calculation which is the overlapping
        interval of the two roc curves
        The ratio between the two ROCs
        The ratio error between the two ROCs

    Raises
    ------
    ValueError
        If the signal efficiency between the two ROCs do not match
    """
    if not np.array_equal(self.sig_eff, roc_comp.sig_eff):
        raise ValueError("Signal efficiencies of the two ROCs do not match.")

    ratio = self.bkg_rej / roc_comp.bkg_rej
    if inverse:
        ratio = 1 / ratio

    ratio_err = self.binomial_error(norm=True) * ratio if self.n_test else None
    return self.sig_eff, ratio, ratio_err

`puma.roc.RocPlot` #

Bases: puma.plot_base.PlotBase

ROC plot class.

Parameters:

Name	Type	Description	Default
`grid`	`bool`	Set the grid for the plots.	`True`
`**kwargs`	`typing.Any`	Keyword arguments from `puma.PlotObject`	`{}`

Source code in puma/roc.py

def __init__(self, grid: bool = True, **kwargs: Any) -> None:
    super().__init__(grid=grid, **kwargs)
    self.test = ""
    self.rocs: dict[str, Roc] = {}
    self.roc_ratios: dict[str, tuple] = {}
    self.rej_axes: dict[str, Axes] = {}
    self.rej_class_ls: dict[str, str] = {}
    self.label_colours: dict[Any, str] = {}
    self.leg_rej_labels: dict[str, str] = {}
    self.reference_roc: dict[Label, dict[str, str]] = {}
    self.initialise_figure()
    self.fig.get_layout_engine().set(h_pad=0, hspace=0)
    self.eff_min, self.eff_max = (1, 0)
    self.default_linestyles = get_good_linestyles()
    self.legend_flavs = None
    self.rej_leg_loc = "ratio" if kwargs["n_ratio_panels"] > 0 else "lower left"

`add_ratios` #

Calculating ratios.

Raises:

Type	Description
`ValueError`	If number of reference rocs and ratio panels don't match If no ratio classes are set

Source code in puma/roc.py

def add_ratios(self) -> None:
    """Calculating ratios.

    Raises
    ------
    ValueError
        If number of reference rocs and ratio panels don't match
        If no ratio classes are set
    """
    if self.reference_roc and len(self.reference_roc) != self.n_ratio_panels:
        raise ValueError(
            f"{len(self.reference_roc)} reference rocs defined but requested "
            f"{self.n_ratio_panels} ratio panels."
        )
    if len(self.rej_axes) != self.n_ratio_panels:
        raise ValueError("Ratio classes not set, set them first with `set_ratio_class`.")

    for rej_class, axis in self.rej_axes.items():
        self.plot_ratios(axis=axis, rej_class=rej_class)

`add_roc` #

Adding puma.Roc object to figure.

Parameters:

Name	Type	Description	Default
`roc_curve`	`puma.roc.Roc`	ROC curve	required
`key`	`str \| None`	Unique identifier for roc_curve, by default None	`None`
`reference`	`bool`	If roc is used as reference for ratio calculation, by default False	`False`

Raises:

Type	Description
`KeyError`	If unique identifier key is used twice

Source code in puma/roc.py

def add_roc(
    self,
    roc_curve: Roc,
    key: str | None = None,
    reference: bool = False,
) -> None:
    """Adding puma.Roc object to figure.

    Parameters
    ----------
    roc_curve : Roc
        ROC curve
    key : str | None, optional
        Unique identifier for roc_curve, by default None
    reference : bool, optional
        If roc is used as reference for ratio calculation, by default False

    Raises
    ------
    KeyError
        If unique identifier key is used twice
    """
    key = cast(str, key if key is not None else f"{len(self.rocs) + 1}")

    if key in self.rocs:
        raise KeyError(f"Duplicated key {key} already used for roc unique identifier.")

    self.rocs[key] = roc_curve
    # set linestyle
    if roc_curve.rej_class not in self.rej_class_ls:
        self.rej_class_ls[roc_curve.rej_class] = (
            self.default_linestyles[len(self.rej_class_ls)]
            if roc_curve.linestyle is None
            else roc_curve.linestyle
        )
    elif (
        roc_curve.linestyle != self.rej_class_ls[roc_curve.rej_class]
        and roc_curve.linestyle is not None
    ):
        logger.warning(
            "You specified a different linestyle for the same rejection class "
            "%s. Will keep the linestyle defined first.",
            roc_curve.rej_class,
        )
    if roc_curve.linestyle is None:
        roc_curve.linestyle = self.rej_class_ls[roc_curve.rej_class]

    # set colours
    if roc_curve.label not in self.label_colours:
        self.label_colours[roc_curve.label] = (
            get_good_colours()[len(self.label_colours)]
            if roc_curve.colour is None
            else roc_curve.colour
        )
    elif (
        roc_curve.colour != self.label_colours[roc_curve.label] and roc_curve.colour is not None
    ):
        logger.warning(
            "You specified a different colour for the same label"
            " %s. This will lead to a mismatch in the line colours"
            " and the legend.",
            roc_curve.label,
        )
    if roc_curve.colour is None:
        roc_curve.colour = self.label_colours[roc_curve.label]

    if reference:
        logger.debug("Setting roc %s as reference for %s.", key, roc_curve.rej_class)
        self.set_roc_reference(
            key=key,
            rej_class=roc_curve.rej_class,
            ratio_group=roc_curve.ratio_group,
        )
        self.reference_label = roc_curve.label

`draw` #

Draw plotting.

Parameters:

Name	Type	Description	Default
`labelpad`	`int \| None`	Spacing in points from the axes bounding box including ticks and tick labels, by default None	`None`

Source code in puma/roc.py

def draw(
    self,
    labelpad: int | None = None,
):
    """Draw plotting.

    Parameters
    ----------
    labelpad : int | None, optional
        Spacing in points from the axes bounding box including
        ticks and tick labels, by default None
    """
    plt_handles = self.plot_roc()
    xmin, xmax = self.get_xlim_auto()

    self.set_xlim(
        xmin if self.xmin is None else self.xmin,
        xmax if self.xmax is None else self.xmax,
    )
    if self.n_ratio_panels > 0:
        self.add_ratios()
    self.set_title()
    self.set_log()
    self.set_y_lim()
    self.set_xlabel()
    self.set_ylabel(self.axis_top)

    common_ratio_ylabel_text = None
    if self.n_ratio_panels > 0 and hasattr(self, "reference_label") and self.reference_label:
        common_ratio_ylabel_text = f"Ratio to {self.reference_label}"

    if self.n_ratio_panels < 2:
        self.make_legend(plt_handles, ax_mpl=self.axis_top)
    else:
        if not self.leg_rej_labels:
            for rej_class in self.rej_axes:
                self.leg_rej_labels[rej_class] = rej_class
        self.make_split_legend(handles=plt_handles)

    self.plotting_done = True
    if self.apply_atlas_style is True:
        self.atlasify()
        # atlasify can only handle one legend. Therefore, we remove the frame of
        # the second legend by hand
        if self.legend_flavs is not None:
            self.legend_flavs.set_frame_on(False)

    # Ensure the figure layout is calculated before getting extents
    self.fig.canvas.draw_idle()

    # Add the common ratio label as figure text if it exists
    if common_ratio_ylabel_text and self.axis_top.yaxis.get_label().get_text():
        main_ylabel_obj = self.axis_top.yaxis.get_label()
        renderer = self.fig.canvas.get_renderer()
        main_ylabel_disp_bbox = main_ylabel_obj.get_window_extent(renderer=renderer)
        main_ylabel_fig_bbox = main_ylabel_disp_bbox.transformed(
            self.fig.transFigure.inverted()
        )

        ratio_ylabel_offset = 0
        if labelpad:
            fig_width_inches = self.fig.get_figwidth()
            # The default labelpad for the axis_top is 4 points
            ratio_ylabel_offset_inchs = (labelpad - 4) / 72
            ratio_ylabel_offset = ratio_ylabel_offset_inchs / fig_width_inches

        fig_text_x = main_ylabel_fig_bbox.x0 - ratio_ylabel_offset
        last_ratio_ax_bbox_fig = self.ratio_axes[-1].get_position()
        fig_text_y = last_ratio_ax_bbox_fig.y0

        self.fig.text(
            fig_text_x,
            fig_text_y,
            common_ratio_ylabel_text,
            rotation="vertical",
            va="baseline",
            ha="left",
            fontsize=self.label_fontsize,
            transform=self.fig.transFigure,
        )

    adjust_ylabels(self.fig, self.rej_axes.values())

`get_xlim_auto` #

Returns min and max efficiency values.

Returns:

Type	Description
`float`	Min and max efficiency values

Source code in puma/roc.py

def get_xlim_auto(self) -> tuple[float, float]:
    """Returns min and max efficiency values.

    Returns
    -------
    float
        Min and max efficiency values
    """
    for elem in self.rocs.values():
        self.eff_min = min(np.min(elem.sig_eff), self.eff_min)
        self.eff_max = max(np.max(elem.sig_eff), self.eff_min)

    return self.eff_min, self.eff_max

`make_split_legend` #

Draw legend for the case of 2 ratios, splitting up legend into models and rejection class.

Parameters:

Name	Type	Description	Default
`handles`	`list`	List of Line2D objects to extract info for legend	required

Raises:

Type	Description
`ValueError`	If not 2 ratios requested

Source code in puma/roc.py

def make_split_legend(self, handles: list) -> None:
    """Draw legend for the case of 2 ratios, splitting up legend into models and
    rejection class.

    Parameters
    ----------
    handles : list
        List of Line2D objects to extract info for legend

    Raises
    ------
    ValueError
        If not 2 ratios requested
    """
    if self.n_ratio_panels < 2:
        raise ValueError("For a split legend you need 2 ratio panels.")

    if self.rej_leg_loc == "ratio":
        for rej_class, axis in self.rej_axes.items():
            legend_line = mpl.lines.Line2D(
                [],
                [],
                color="k",
                label=self.leg_rej_labels[rej_class],
                linestyle=self.rej_class_ls[rej_class],
            )
            axis.legend(
                handles=[legend_line],
                labels=[legend_line.get_label()],
                loc="upper right",
                fontsize=self.leg_fontsize,
            )

    else:
        line_list_rej = [
            mpl.lines.Line2D(
                [],
                [],
                color="k",
                label=self.leg_rej_labels[rej_class],
                linestyle=self.rej_class_ls[rej_class],
            )
            for rej_class in self.rej_axes
        ]

        self.legend_flavs = self.axis_top.legend(
            handles=line_list_rej,
            labels=[handle.get_label() for handle in line_list_rej],
            loc=self.rej_leg_loc,
            fontsize=self.leg_fontsize,
            ncol=self.leg_ncol,
        )

        # Add the second legend to plot
        self.axis_top.add_artist(self.legend_flavs)

    # Get the labels for the legends
    labels_list = []
    lines_list = []

    for line in handles:
        if line.get_label() not in labels_list:
            labels_list.append(line.get_label())
            lines_list.append(line)

    # Define the legend
    self.axis_top.legend(
        handles=lines_list,
        labels=labels_list,
        loc=self.leg_loc,
        fontsize=self.leg_fontsize,
        ncol=self.leg_ncol,
    )

`plot_ratios` #

Plotting ratio curves.

Parameters:

Name	Type	Description	Default
`axis`	`matplotlib.pyplot.axis`	matplotlib axis object	required
`rej_class`	`str`	Rejection class	required

Source code in puma/roc.py

def plot_ratios(self, axis: plt.axis, rej_class: str) -> None:
    """Plotting ratio curves.

    Parameters
    ----------
    axis : plt.axis
        matplotlib axis object
    rej_class : str
        Rejection class
    """
    for key, elem in self.rocs.items():
        if elem.rej_class != rej_class:
            continue
        sig_eff, ratio, ratio_err = elem.divide(
            self.rocs[self.reference_roc[rej_class][elem.ratio_group]]
        )

        self.roc_ratios[key] = (sig_eff, ratio, ratio_err)
        axis.plot(
            sig_eff,
            ratio,
            color=elem.colour,
            linestyle=elem.linestyle,
            linewidth=2.0,
        )
        if ratio_err is not None:
            axis.fill_between(
                sig_eff,
                ratio - ratio_err,
                ratio + ratio_err,
                color=elem.colour,
                alpha=0.25,
                edgecolor="none",
                zorder=1,
            )

`plot_roc` #

Plotting roc curves.

Parameters:

Name	Type	Description	Default
`**kwargs`	`typing.Any`	Keyword arguments passed to plt.axis.plot	`{}`

Returns:

Type	Description
`puma.line_plot_2d.Line2D`	matplotlib Line2D object

Source code in puma/roc.py

def plot_roc(self, **kwargs: Any) -> mpl.lines.Line2D:
    """Plotting roc curves.

    Parameters
    ----------
    **kwargs: Any
        Keyword arguments passed to plt.axis.plot

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    plt_handles = []
    for key, elem in self.rocs.items():
        plt_handles += self.axis_top.plot(
            elem.sig_eff[elem.non_zero_mask],
            elem.bkg_rej[elem.non_zero_mask],
            linestyle=elem.linestyle,
            linewidth=2,
            color=elem.colour,
            label=elem.label if elem is not None else key,
            zorder=2,
            **kwargs,
        )
        if elem.n_test is not None:
            # if uncertainties are available for roc plotting their uncertainty as
            # a band around the roc itself
            rej_band_down = (
                elem.bkg_rej[elem.non_zero_mask] - elem.binomial_error()[elem.non_zero_mask]
            )
            rej_band_up = (
                elem.bkg_rej[elem.non_zero_mask] + elem.binomial_error()[elem.non_zero_mask]
            )
            self.axis_top.fill_between(
                elem.sig_eff[elem.non_zero_mask],
                rej_band_down,
                rej_band_up,
                color=elem.colour,
                alpha=0.25,
                edgecolor="none",
                zorder=2,
            )
    return plt_handles

`set_ratio_class` #

Associate the rejection class to a ratio panel adn set the legend label.

Parameters:

Name	Type	Description	Default
`ratio_panel`	`int`	Ratio panel either 1 or 2	required
`rej_class`	`str \| ftag.Label`	Rejection class associated to that panel. Either a Label instance or a string	required
`rej_class_label`	`str \| None`	If rej_class is not a Label, this label must be given	`None`

Raises:

Type	Description
`TypeError`	If the rej_class is a string and rej_class_label is not defined

Source code in puma/roc.py

def set_ratio_class(
    self,
    ratio_panel: int,
    rej_class: str | Label,
    rej_class_label: str | None = None,
) -> None:
    """Associate the rejection class to a ratio panel adn set the legend label.

    Parameters
    ----------
    ratio_panel : int
        Ratio panel either 1 or 2
    rej_class : str | Label
        Rejection class associated to that panel. Either a Label instance or a string
    rej_class_label : str | None, optional
        If rej_class is not a Label, this label must be given

    Raises
    ------
    TypeError
        If the rej_class is a string and rej_class_label is not defined
    """
    if isinstance(rej_class, Label):
        rej_class_label_str = rej_class.label
        rej_class_name = rej_class.name

    else:
        if not isinstance(rej_class_label, str):
            raise TypeError(
                "'rej_class_label' must be a string when 'rej_class' is not a Label!"
            )
        rej_class_label_str = rej_class_label
        rej_class_name = rej_class

    self.rej_axes[rej_class_name] = self.ratio_axes[ratio_panel - 1]
    label = rej_class_label_str.replace("jets", "jet")
    self.set_ratio_label(ratio_panel, f"{label} ratio")
    self.leg_rej_labels[rej_class_name] = rej_class_label_str

`set_roc_reference` #

Setting the reference roc curves used in the ratios.

Parameters:

Name	Type	Description	Default
`key`	`str`	Unique identifier of roc object	required
`rej_class`	`str \| ftag.Label`	Rejection class encoded in roc curve	required
`ratio_group`	`str`	Ratio group this roc is reference for	required

Raises:

Type	Description
`ValueError`	If more rejection classes are set than actual ratio panels available.

Source code in puma/roc.py

def set_roc_reference(
    self,
    key: str,
    rej_class: str | Label,
    ratio_group: str,
):
    """Setting the reference roc curves used in the ratios.

    Parameters
    ----------
    key : str
        Unique identifier of roc object
    rej_class : str | Label
        Rejection class encoded in roc curve
    ratio_group : str
        Ratio group this roc is reference for

    Raises
    ------
    ValueError
        If more rejection classes are set than actual ratio panels available.
    """
    # Ensure that rej_class is not an instance of label
    rej_class_str = rej_class.name if isinstance(rej_class, Label) else rej_class

    if rej_class_str not in self.reference_roc:
        if len(self.reference_roc) > self.n_ratio_panels:
            raise ValueError(
                "You cannot set more rejection classes than available ratio panels."
            )
        self.reference_roc[rej_class_str] = {ratio_group: key}
    else:
        if self.reference_roc[rej_class_str].get(ratio_group):
            logger.warning(
                "You specified a second roc curve %s as reference for ratio. "
                "Using it as new reference instead of %s.",
                key,
                self.reference_roc[rej_class_str][ratio_group],
            )
        self.reference_roc[rej_class_str][ratio_group] = key

ROC

puma.roc.Roc #

args_to_store property #

non_zero property #

non_zero_mask property #

binomial_error #

divide #

puma.roc.RocPlot #

add_ratios #

add_roc #

draw #

get_xlim_auto #

make_split_legend #

plot_ratios #

plot_roc #

set_ratio_class #

set_roc_reference #

`puma.roc.Roc` #

`args_to_store` `property` #

`non_zero` `property` #

`non_zero_mask` `property` #

`binomial_error` #

`divide` #

`puma.roc.RocPlot` #

`add_ratios` #

`add_roc` #

`draw` #

`get_xlim_auto` #

`make_split_legend` #

`plot_ratios` #

`plot_roc` #

`set_ratio_class` #

`set_roc_reference` #