Skip to content

Roc

puma.roc.Roc #

Bases: puma.plot_base.PlotLineObject

Represent a single ROC curve and allows to calculate ratio w.r.t other ROCs.

Parameters:

Name Type Description Default
sig_eff numpy.ndarray

Array of signal efficiencies

required
bkg_rej numpy.ndarray

Array of background rejection

required
n_test float | None

Number of events used to calculate the background efficiencies. For weighted samples, pass the effective sample size N_eff = (sum w)^2 / sum(w^2) instead of the raw count. By default None

None
rej_class str | ftag.Label

Rejection class, e.g. for b-tagging anc charm rejection "cjets", by default None

None
signal_class str | None

Signal class, e.g. for b-tagging "bjets", by default None

None
key str | None

Identifier for roc curve e.g. tagger, by default None

None
ratio_group str | None

Identifies the reference ROC group for ratio calculation, by default None

None
use_bkg_eff bool

Correct error calculation for background efficiency ROCs instead of background rejection, by default False

False
**kwargs typing.Any

Keyword arguments passed to puma.PlotLineObject

{}

Raises:

Type Description
ValueError

If sig_eff and bkg_rej have a different shape

Source code in puma/roc.py
def __init__(
    self,
    sig_eff: np.ndarray,
    bkg_rej: np.ndarray,
    n_test: float | None = None,
    rej_class: str | Label = None,
    signal_class: str | None = None,
    key: str | None = None,
    ratio_group: str | None = None,
    use_bkg_eff: bool = False,
    **kwargs: Any,
) -> None:
    super().__init__(**kwargs)
    if len(sig_eff) != len(bkg_rej):
        raise ValueError(
            f"The shape of `sig_eff` ({np.shape(sig_eff)}) and `bkg_rej` "
            f"({np.shape(bkg_rej)}) have to be identical."
        )
    self.sig_eff = sig_eff
    self.bkg_rej = bkg_rej
    self.n_test = None if n_test is None else float(n_test)
    self.signal_class = signal_class
    self.rej_class = rej_class.name if isinstance(rej_class, Label) else rej_class
    self.key = key
    self.ratio_group = ratio_group or str(rej_class)
    self.use_bkg_eff = use_bkg_eff
    self.kwargs = kwargs

    # Check that the rejection class is a string
    if self.rej_class and not isinstance(self.rej_class, str):
        raise ValueError(
            f"'rej_class' must either be a string or a Label! You gave {type(self.rej_class)}"
        )

args_to_store property #

Returns the arguments that need to be stored/loaded.

Returns:

Type Description
dict[str, typing.Any]

Dict with the arguments

non_zero property #

Abstraction of non_zero_mask.

Returns:

Type Description
numpy.array

Masked signal efficiency

numpy.array

Masked background rejection

non_zero_mask property #

Masking points where rejection is 0 and no signal efficiency change present.

Returns:

Type Description
numpy.array

Masked indices

binomial_error #

Calculate binomial error of roc curve.

Parameters:

Name Type Description Default
norm bool

If True calulate relative error, by default False

False
n_test float | None

Number of events used to calculate the background efficiencies. For weighted samples, pass N_eff = (sum w)^2 / sum(w^2). By default None

None

Returns:

Type Description
numpy.ndarray

Binomial error

Raises:

Type Description
ValueError

If no n_test was provided

Source code in puma/roc.py
def binomial_error(self, norm: bool = False, n_test: float | None = None) -> np.ndarray:
    """Calculate binomial error of roc curve.

    Parameters
    ----------
    norm : bool, optional
        If True calulate relative error, by default False
    n_test : float | None
        Number of events used to calculate the background efficiencies.
        For weighted samples, pass N_eff = (sum w)^2 / sum(w^2).
        By default None

    Returns
    -------
    np.ndarray
        Binomial error

    Raises
    ------
    ValueError
        If no `n_test` was provided
    """
    if n_test is None:
        n_test = self.n_test
    if n_test is None:
        raise ValueError("No `n_test` provided, cannot calculate binomial error!")
    if self.use_bkg_eff:
        return calculate_efficiency_error(self.bkg_rej[self.non_zero_mask], n_test, norm=norm)
    return calculate_rejection_error(self.bkg_rej[self.non_zero_mask], n_test, norm=norm)

divide #

Calculate ratio between the roc curve and another roc.

Parameters:

Name Type Description Default
roc_comp puma.roc.Roc

Second roc curve to calculate ratio with

required
inverse bool

If False the ratio is calculated this_roc / roc_comp, if True the inverse is calculated

False

Returns:

Type Description
tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]

The signal efficiency used for the ratio calculation which is the overlapping interval of the two roc curves The ratio between the two ROCs The ratio error between the two ROCs

Raises:

Type Description
ValueError

If the signal efficiency between the two ROCs do not match

Source code in puma/roc.py
def divide(
    self,
    roc_comp: Roc,
    inverse: bool = False,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Calculate ratio between the roc curve and another roc.

    Parameters
    ----------
    roc_comp : Roc
        Second roc curve to calculate ratio with
    inverse : bool
        If False the ratio is calculated `this_roc / roc_comp`,
        if True the inverse is calculated

    Returns
    -------
    tuple[np.ndarray, np.ndarray, np.ndarray]
        The signal efficiency used for the ratio calculation which is the overlapping
        interval of the two roc curves
        The ratio between the two ROCs
        The ratio error between the two ROCs

    Raises
    ------
    ValueError
        If the signal efficiency between the two ROCs do not match
    """
    if not np.array_equal(self.sig_eff, roc_comp.sig_eff):
        raise ValueError("Signal efficiencies of the two ROCs do not match.")

    ratio = self.bkg_rej / roc_comp.bkg_rej
    if inverse:
        ratio = 1 / ratio

    ratio_err = (
        self.binomial_error(norm=True) * ratio[self.non_zero_mask] if self.n_test else None
    )
    return self.sig_eff, ratio, ratio_err

puma.roc.RocPlot #

Bases: puma.plot_base.PlotBase

ROC plot class.

Parameters:

Name Type Description Default
grid bool

Set the grid for the plots.

True
**kwargs typing.Any

Keyword arguments from puma.PlotObject

{}
Source code in puma/roc.py
def __init__(self, grid: bool = True, **kwargs: Any) -> None:
    super().__init__(grid=grid, **kwargs)
    self.test = ""
    self.rocs: dict[str, Roc] = {}
    self.roc_ratios: dict[str, tuple] = {}
    self.rej_axes: dict[str, Axes] = {}
    self.rej_class_ls: dict[str, str] = {}
    self.label_colours: dict[Any, str] = {}
    self.leg_rej_labels: dict[str, str] = {}
    self.reference_roc: dict[Label, dict[str, str]] = {}
    self.initialise_figure()
    self.fig.get_layout_engine().set(h_pad=0, hspace=0)
    self.eff_min, self.eff_max = (1, 0)
    self.default_linestyles = get_good_linestyles()
    self.legend_flavs = None
    self.rej_leg_loc = "ratio" if kwargs["n_ratio_panels"] > 0 else "lower left"

add_ratios #

Calculating ratios.

Raises:

Type Description
ValueError

If number of reference rocs and ratio panels don't match If no ratio classes are set

Source code in puma/roc.py
def add_ratios(self) -> None:
    """Calculating ratios.

    Raises
    ------
    ValueError
        If number of reference rocs and ratio panels don't match
        If no ratio classes are set
    """
    if self.reference_roc and len(self.reference_roc) != self.n_ratio_panels:
        raise ValueError(
            f"{len(self.reference_roc)} reference rocs defined but requested "
            f"{self.n_ratio_panels} ratio panels."
        )
    if len(self.rej_axes) != self.n_ratio_panels:
        raise ValueError("Ratio classes not set, set them first with `set_ratio_class`.")

    for rej_class, axis in self.rej_axes.items():
        self.plot_ratios(axis=axis, rej_class=rej_class)

add_roc #

Adding puma.Roc object to figure.

Parameters:

Name Type Description Default
roc_curve puma.roc.Roc

ROC curve

required
key str | None

Unique identifier for roc_curve, by default None

None
reference bool

If roc is used as reference for ratio calculation, by default False

False

Raises:

Type Description
KeyError

If unique identifier key is used twice

Source code in puma/roc.py
def add_roc(
    self,
    roc_curve: Roc,
    key: str | None = None,
    reference: bool = False,
) -> None:
    """Adding puma.Roc object to figure.

    Parameters
    ----------
    roc_curve : Roc
        ROC curve
    key : str | None, optional
        Unique identifier for roc_curve, by default None
    reference : bool, optional
        If roc is used as reference for ratio calculation, by default False

    Raises
    ------
    KeyError
        If unique identifier key is used twice
    """
    key = cast(str, key if key is not None else f"{len(self.rocs) + 1}")

    if key in self.rocs:
        raise KeyError(f"Duplicated key {key} already used for roc unique identifier.")

    self.rocs[key] = roc_curve
    # set linestyle
    if roc_curve.rej_class not in self.rej_class_ls:
        self.rej_class_ls[roc_curve.rej_class] = (
            self.default_linestyles[len(self.rej_class_ls)]
            if roc_curve.linestyle is None
            else roc_curve.linestyle
        )
    elif (
        roc_curve.linestyle != self.rej_class_ls[roc_curve.rej_class]
        and roc_curve.linestyle is not None
    ):
        logger.warning(
            "You specified a different linestyle for the same rejection class "
            "%s. Will keep the linestyle defined first.",
            roc_curve.rej_class,
        )
    if roc_curve.linestyle is None:
        roc_curve.linestyle = self.rej_class_ls[roc_curve.rej_class]

    # set colours
    if roc_curve.label not in self.label_colours:
        self.label_colours[roc_curve.label] = (
            get_good_colours()[len(self.label_colours)]
            if roc_curve.colour is None
            else roc_curve.colour
        )
    elif (
        roc_curve.colour != self.label_colours[roc_curve.label] and roc_curve.colour is not None
    ):
        logger.warning(
            "You specified a different colour for the same label"
            " %s. This will lead to a mismatch in the line colours"
            " and the legend.",
            roc_curve.label,
        )
    if roc_curve.colour is None:
        roc_curve.colour = self.label_colours[roc_curve.label]

    if reference:
        logger.debug("Setting roc %s as reference for %s.", key, roc_curve.rej_class)
        self.set_roc_reference(
            key=key,
            rej_class=roc_curve.rej_class,
            ratio_group=roc_curve.ratio_group,
        )
        self.reference_label = roc_curve.label

draw #

Draw plotting.

Parameters:

Name Type Description Default
labelpad int | None

Spacing in points from the axes bounding box including ticks and tick labels, by default None

None
Source code in puma/roc.py
def draw(
    self,
    labelpad: int | None = None,
):
    """Draw plotting.

    Parameters
    ----------
    labelpad : int | None, optional
        Spacing in points from the axes bounding box including
        ticks and tick labels, by default None
    """
    plt_handles = self.plot_roc()
    xmin, xmax = self.get_xlim_auto()

    self.set_xlim(
        xmin if self.xmin is None else self.xmin,
        xmax if self.xmax is None else self.xmax,
    )
    if self.n_ratio_panels > 0:
        self.add_ratios()
    self.set_title()
    self.set_log()
    self.set_y_lim()
    self.set_xlabel()
    self.set_ylabel(self.axis_top)

    common_ratio_ylabel_text = None
    if self.n_ratio_panels > 0 and hasattr(self, "reference_label") and self.reference_label:
        common_ratio_ylabel_text = f"Ratio to {self.reference_label}"

    if self.n_ratio_panels < 2:
        self.make_legend(plt_handles, ax_mpl=self.axis_top)
    else:
        if not self.leg_rej_labels:
            for rej_class in self.rej_axes:
                self.leg_rej_labels[rej_class] = rej_class
        self.make_split_legend(handles=plt_handles)

    self.plotting_done = True
    if self.apply_atlas_style is True:
        self.atlasify()
        # atlasify can only handle one legend. Therefore, we remove the frame of
        # the second legend by hand
        if self.legend_flavs is not None:
            self.legend_flavs.set_frame_on(False)

    # Ensure the figure layout is calculated before getting extents
    self.fig.canvas.draw_idle()

    # Add the common ratio label as figure text if it exists
    if common_ratio_ylabel_text and self.axis_top.yaxis.get_label().get_text():
        main_ylabel_obj = self.axis_top.yaxis.get_label()
        renderer = self.fig.canvas.get_renderer()
        main_ylabel_disp_bbox = main_ylabel_obj.get_window_extent(renderer=renderer)
        main_ylabel_fig_bbox = main_ylabel_disp_bbox.transformed(
            self.fig.transFigure.inverted()
        )

        ratio_ylabel_offset = 0
        if labelpad:
            fig_width_inches = self.fig.get_figwidth()
            # The default labelpad for the axis_top is 4 points
            ratio_ylabel_offset_inchs = (labelpad - 4) / 72
            ratio_ylabel_offset = ratio_ylabel_offset_inchs / fig_width_inches

        fig_text_x = main_ylabel_fig_bbox.x0 - ratio_ylabel_offset
        last_ratio_ax_bbox_fig = self.ratio_axes[-1].get_position()
        fig_text_y = last_ratio_ax_bbox_fig.y0

        self.fig.text(
            fig_text_x,
            fig_text_y,
            common_ratio_ylabel_text,
            rotation="vertical",
            va="baseline",
            ha="left",
            fontsize=self.label_fontsize,
            transform=self.fig.transFigure,
        )

    adjust_ylabels(self.fig, self.rej_axes.values())

get_xlim_auto #

Returns min and max efficiency values.

Returns:

Type Description
float

Min and max efficiency values

Source code in puma/roc.py
def get_xlim_auto(self) -> tuple[float, float]:
    """Returns min and max efficiency values.

    Returns
    -------
    float
        Min and max efficiency values
    """
    for elem in self.rocs.values():
        self.eff_min = min(np.min(elem.sig_eff), self.eff_min)
        self.eff_max = max(np.max(elem.sig_eff), self.eff_min)

    return self.eff_min, self.eff_max

make_split_legend #

Draw legend for the case of 2 ratios, splitting up legend into models and rejection class.

Parameters:

Name Type Description Default
handles list

List of Line2D objects to extract info for legend

required

Raises:

Type Description
ValueError

If not 2 ratios requested

Source code in puma/roc.py
def make_split_legend(self, handles: list) -> None:
    """Draw legend for the case of 2 ratios, splitting up legend into models and
    rejection class.

    Parameters
    ----------
    handles : list
        List of Line2D objects to extract info for legend

    Raises
    ------
    ValueError
        If not 2 ratios requested
    """
    if self.n_ratio_panels < 2:
        raise ValueError("For a split legend you need 2 ratio panels.")

    if self.rej_leg_loc == "ratio":
        for rej_class, axis in self.rej_axes.items():
            legend_line = mpl.lines.Line2D(
                [],
                [],
                color="k",
                label=self.leg_rej_labels[rej_class],
                linestyle=self.rej_class_ls[rej_class],
            )
            axis.legend(
                handles=[legend_line],
                labels=[legend_line.get_label()],
                loc="upper right",
                fontsize=self.leg_fontsize,
            )

    else:
        line_list_rej = [
            mpl.lines.Line2D(
                [],
                [],
                color="k",
                label=self.leg_rej_labels[rej_class],
                linestyle=self.rej_class_ls[rej_class],
            )
            for rej_class in self.rej_axes
        ]

        self.legend_flavs = self.axis_top.legend(
            handles=line_list_rej,
            labels=[handle.get_label() for handle in line_list_rej],
            loc=self.rej_leg_loc,
            fontsize=self.leg_fontsize,
            ncol=self.leg_ncol,
        )

        # Add the second legend to plot
        self.axis_top.add_artist(self.legend_flavs)

    # Get the labels for the legends
    labels_list = []
    lines_list = []

    for line in handles:
        if line.get_label() not in labels_list:
            labels_list.append(line.get_label())
            lines_list.append(line)

    # Define the legend
    self.axis_top.legend(
        handles=lines_list,
        labels=labels_list,
        loc=self.leg_loc,
        fontsize=self.leg_fontsize,
        ncol=self.leg_ncol,
    )

plot_ratios #

Plotting ratio curves.

Parameters:

Name Type Description Default
axis matplotlib.pyplot.axis

matplotlib axis object

required
rej_class str

Rejection class

required
Source code in puma/roc.py
def plot_ratios(self, axis: plt.axis, rej_class: str) -> None:
    """Plotting ratio curves.

    Parameters
    ----------
    axis : plt.axis
        matplotlib axis object
    rej_class : str
        Rejection class
    """
    for key, elem in self.rocs.items():
        if elem.rej_class != rej_class:
            continue
        sig_eff, ratio, ratio_err = elem.divide(
            self.rocs[self.reference_roc[rej_class][elem.ratio_group]]
        )

        self.roc_ratios[key] = (sig_eff, ratio, ratio_err)
        axis.plot(
            sig_eff,
            ratio,
            color=elem.colour,
            linestyle=elem.linestyle,
            linewidth=2.0,
        )
        if ratio_err is not None:
            axis.fill_between(
                sig_eff,
                ratio - ratio_err,
                ratio + ratio_err,
                color=elem.colour,
                alpha=0.25,
                edgecolor="none",
                zorder=1,
            )

plot_roc #

Plotting roc curves.

Parameters:

Name Type Description Default
**kwargs typing.Any

Keyword arguments passed to plt.axis.plot

{}

Returns:

Type Description
puma.line_plot_2d.Line2D

matplotlib Line2D object

Source code in puma/roc.py
def plot_roc(self, **kwargs: Any) -> mpl.lines.Line2D:
    """Plotting roc curves.

    Parameters
    ----------
    **kwargs: Any
        Keyword arguments passed to plt.axis.plot

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    plt_handles = []
    for key, elem in self.rocs.items():
        plt_handles += self.axis_top.plot(
            elem.sig_eff[elem.non_zero_mask],
            elem.bkg_rej[elem.non_zero_mask],
            linestyle=elem.linestyle,
            linewidth=2,
            color=elem.colour,
            label=elem.label if elem is not None else key,
            zorder=2,
            **kwargs,
        )
        if elem.n_test is not None:
            # if uncertainties are available for roc plotting their uncertainty as
            # a band around the roc itself
            rej_band_down = elem.bkg_rej[elem.non_zero_mask] - elem.binomial_error()
            rej_band_up = elem.bkg_rej[elem.non_zero_mask] + elem.binomial_error()
            self.axis_top.fill_between(
                elem.sig_eff[elem.non_zero_mask],
                rej_band_down,
                rej_band_up,
                color=elem.colour,
                alpha=0.25,
                edgecolor="none",
                zorder=2,
            )
    return plt_handles

set_ratio_class #

Associate the rejection class to a ratio panel adn set the legend label.

Parameters:

Name Type Description Default
ratio_panel int

Ratio panel either 1 or 2

required
rej_class str | ftag.Label

Rejection class associated to that panel. Either a Label instance or a string

required
rej_class_label str | None

If rej_class is not a Label, this label must be given

None

Raises:

Type Description
TypeError

If the rej_class is a string and rej_class_label is not defined

Source code in puma/roc.py
def set_ratio_class(
    self,
    ratio_panel: int,
    rej_class: str | Label,
    rej_class_label: str | None = None,
) -> None:
    """Associate the rejection class to a ratio panel adn set the legend label.

    Parameters
    ----------
    ratio_panel : int
        Ratio panel either 1 or 2
    rej_class : str | Label
        Rejection class associated to that panel. Either a Label instance or a string
    rej_class_label : str | None, optional
        If rej_class is not a Label, this label must be given

    Raises
    ------
    TypeError
        If the rej_class is a string and rej_class_label is not defined
    """
    if isinstance(rej_class, Label):
        rej_class_label_str = rej_class.label
        rej_class_name = rej_class.name

    else:
        if not isinstance(rej_class_label, str):
            raise TypeError(
                "'rej_class_label' must be a string when 'rej_class' is not a Label!"
            )
        rej_class_label_str = rej_class_label
        rej_class_name = rej_class

    self.rej_axes[rej_class_name] = self.ratio_axes[ratio_panel - 1]
    label = rej_class_label_str.replace("jets", "jet")
    self.set_ratio_label(ratio_panel, f"{label} ratio")
    self.leg_rej_labels[rej_class_name] = rej_class_label_str

set_roc_reference #

Setting the reference roc curves used in the ratios.

Parameters:

Name Type Description Default
key str

Unique identifier of roc object

required
rej_class str | ftag.Label

Rejection class encoded in roc curve

required
ratio_group str

Ratio group this roc is reference for

required

Raises:

Type Description
ValueError

If more rejection classes are set than actual ratio panels available.

Source code in puma/roc.py
def set_roc_reference(
    self,
    key: str,
    rej_class: str | Label,
    ratio_group: str,
):
    """Setting the reference roc curves used in the ratios.

    Parameters
    ----------
    key : str
        Unique identifier of roc object
    rej_class : str | Label
        Rejection class encoded in roc curve
    ratio_group : str
        Ratio group this roc is reference for

    Raises
    ------
    ValueError
        If more rejection classes are set than actual ratio panels available.
    """
    # Ensure that rej_class is not an instance of label
    rej_class_str = rej_class.name if isinstance(rej_class, Label) else rej_class

    if rej_class_str not in self.reference_roc:
        if len(self.reference_roc) > self.n_ratio_panels:
            raise ValueError(
                "You cannot set more rejection classes than available ratio panels."
            )
        self.reference_roc[rej_class_str] = {ratio_group: key}
    else:
        if self.reference_roc[rej_class_str].get(ratio_group):
            logger.warning(
                "You specified a second roc curve %s as reference for ratio. "
                "Using it as new reference instead of %s.",
                key,
                self.reference_roc[rej_class_str][ratio_group],
            )
        self.reference_roc[rej_class_str][ratio_group] = key