Skip to content

Var vs var

puma.var_vs_var.VarVsVar #

Bases: puma.plot_base.PlotLineObject

VarVsVar class storing info about curve and allows to calculate ratio w.r.t other efficiency plots.

Parameters:

Name Type Description Default
x_var numpy.ndarray

Values for x-axis variable, e.g. bin midpoints for binned data

required
y_var_mean numpy.ndarray

Mean value for y-axis variable

required
y_var_std numpy.ndarray

Std value for y-axis variable

required
x_var_widths numpy.ndarray

Widths for x-axis variable, e.g. bin widths for binned data

None
key str | None

Identifier for the curve e.g. tagger, by default None

None
fill bool

Defines do we need to fill box around point, by default True

True
plot_y_std bool

Defines do we need to plot y_var_std, by default True

True
ratio_group str | None

Name of the ratio group this VarVsVar is compared with. The ratio group allows you to compare different groups of VarVsVar within one plot. By default None

None
**kwargs typing.Any

Keyword arguments passed to PlotLineObject

{}

Raises:

Type Description
ValueError

If provided options are not compatible with each other

Source code in puma/var_vs_var.py
def __init__(
    self,
    x_var: np.ndarray,
    y_var_mean: np.ndarray,
    y_var_std: np.ndarray,
    x_var_widths: np.ndarray = None,
    key: str | None = None,
    fill: bool = True,
    plot_y_std: bool = True,
    ratio_group: str | None = None,
    **kwargs: Any,
) -> None:
    super().__init__(**kwargs)
    if len(x_var) != len(y_var_mean):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `y_var_mean` "
            f"({len(y_var_mean)}) have to be identical."
        )
    if len(x_var) != len(y_var_std):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `y_var_std` "
            f"({len(y_var_std)}) have to be identical."
        )
    if x_var_widths is not None and len(x_var) != len(x_var_widths):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `x_var_widths` "
            f"({len(x_var_widths)}) have to be identical."
        )
    self.x_var = np.array(x_var)
    self.x_var_widths = None if x_var_widths is None else np.array(x_var_widths)
    self.y_var_mean = np.array(y_var_mean)
    self.y_var_std = np.array(y_var_std)

    self.key = key
    self.fill = fill
    self.plot_y_std = plot_y_std
    self.ratio_group = ratio_group

    # Get the kwargs
    self.kwargs = kwargs

args_to_store property #

Returns the arguments that need to be stored/loaded.

Returns:

Type Description
dict[str, typing.Any]

Dict with the arguments

__eq__ #

Handles a == check with the class.

Parameters:

Name Type Description Default
other puma.var_vs_var.VarVsVar

Other VarVsVar that this class is tested against

required

Returns:

Type Description
bool

If this VarVsVar and the other are equal

Source code in puma/var_vs_var.py
def __eq__(self, other: VarVsVar) -> bool:
    """Handles a == check with the class.

    Parameters
    ----------
    other : VarVsVar
        Other VarVsVar that this class is tested against

    Returns
    -------
    bool
        If this VarVsVar and the other are equal
    """
    if isinstance(other, self.__class__):
        return (
            np.all(self.x_var == other.x_var)
            and np.all(self.y_var_mean == other.y_var_mean)
            and np.all(self.y_var_std == other.y_var_std)
            and self.key == other.key
        )
    return False

divide #

Calculate ratio between two class objects.

Parameters:

Name Type Description Default
other puma.var_vs_var.VarVsVar

Second VarVsVar object to calculate ratio with

required
inverse bool

If False the ratio is calculated this / other, if True the inverse is calculated. By default False.

False
method str

Define which method is used for ratio calculation. By default "divide". Other possibility is "root_square_diff" and "subtract".

'divide'

Returns:

Type Description
tuple[numpy.ndarray, numpy.ndarray]

Ratio and ratio error Ratio error

Raises:

Type Description
ValueError

If binning is not identical between 2 objects

Source code in puma/var_vs_var.py
def divide(
    self,
    other: VarVsVar,
    inverse: bool = False,
    method: str = "divide",
) -> tuple[np.ndarray, np.ndarray]:
    """Calculate ratio between two class objects.

    Parameters
    ----------
    other : VarVsVar
        Second VarVsVar object to calculate ratio with
    inverse : bool, optional
        If False the ratio is calculated `this / other`,
        if True the inverse is calculated. By default False.
    method : str, optional
        Define which method is used for ratio calculation. By default "divide".
        Other possibility is "root_square_diff" and "subtract".

    Returns
    -------
    tuple[np.ndarray, np.ndarray]
        Ratio and ratio error
        Ratio error

    Raises
    ------
    ValueError
        If binning is not identical between 2 objects
    """
    # Check that both x variables match
    if not np.array_equal(self.x_var, other.x_var):
        raise ValueError("The x variables of the two given objects do not match.")

    # Get the nominator/denominator + uncertainty
    nom, nom_err = self.y_var_mean, self.y_var_std
    denom, denom_err = other.y_var_mean, other.y_var_std

    # Calculate the ratio/difference between the the actual and other VarVsVar object
    ratio, ratio_err = hist_ratio(
        numerator=denom if inverse else nom,
        denominator=nom if inverse else denom,
        numerator_unc=denom_err if inverse else nom_err,
        step=False,
        method=method,
    )
    return (ratio, ratio_err)

puma.var_vs_var.VarVsVarPlot #

Bases: puma.plot_base.PlotBase

VarVsVar plot class.

Parameters:

Name Type Description Default
grid bool

Set the grid for the plots.

False
ratio_method str

Method for ratio calculations. Accepted values: "divide", "root_square_diff", "subtract". By default "divide"

'divide'
**kwargs typing.Any

Keyword arguments from puma.PlotObject

{}

Raises:

Type Description
ValueError

If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_var.py
def __init__(self, grid: bool = False, ratio_method: str = "divide", **kwargs: Any) -> None:
    super().__init__(grid=grid, **kwargs)

    self.plot_objects: dict[str, VarVsVar] = {}
    self.add_order: list[str] = []
    self.reference_object: list[str] | None = None
    self.x_var_min = np.inf
    self.x_var_max = -np.inf
    self.inverse_cut: bool = False
    if self.n_ratio_panels > 1:
        raise ValueError("Not more than one ratio panel supported.")
    self.ratio_method = ratio_method
    self.initialise_figure()

add #

Adding VarVsVar object to figure.

Parameters:

Name Type Description Default
curve puma.var_vs_var.VarVsVar

VarVsVar curve

required
key str | None

Unique identifier for VarVsVar curve, by default None

None
reference bool

If VarVsVar is used as reference for ratio calculation, by default False

False

Raises:

Type Description
KeyError

If unique identifier key is used twice

Source code in puma/var_vs_var.py
def add(self, curve: VarVsVar, key: str | None = None, reference: bool = False) -> None:
    """Adding VarVsVar object to figure.

    Parameters
    ----------
    curve : VarVsVar
        VarVsVar curve
    key : str | None, optional
        Unique identifier for VarVsVar curve, by default None
    reference : bool, optional
        If VarVsVar is used as reference for ratio calculation, by default False

    Raises
    ------
    KeyError
        If unique identifier key is used twice
    """
    key = cast(str, key if key is not None else f"{len(self.plot_objects) + 1}")

    if key in self.plot_objects:
        raise KeyError(f"Duplicated key {key} already used for unique identifier.")

    self.plot_objects[key] = curve
    self.add_order.append(key)
    # set linestyle
    if curve.linestyle is None:
        curve.linestyle = "-"
    # set colours
    if curve.colour is None:
        curve.colour = get_good_colours()[len(self.plot_objects) - 1]
    # set alpha
    if curve.alpha is None:
        curve.alpha = 0.8
    # set linewidth
    if curve.linewidth is None:
        curve.linewidth = 1.6

    if curve.is_marker is True:
        if curve.marker is None:
            curve.marker = get_good_markers()[len(self.plot_objects)]
        # Set markersize
        if curve.markersize is None:
            curve.markersize = 8
        if curve.markeredgewidth is None:
            curve.markeredgewidth = 2

    # set min and max edges
    if curve.x_var_widths is not None:
        left_edge = curve.x_var - curve.x_var_widths / 2
        right_edge = curve.x_var + curve.x_var_widths / 2
    else:
        left_edge = curve.x_var
        right_edge = curve.x_var
    self.x_var_min = min(self.x_var_min, np.sort(left_edge)[0])
    self.x_var_max = max(self.x_var_max, np.sort(right_edge)[-1])

    if reference:
        logger.debug("Setting roc %s as reference.", key)
        self.set_reference(key)

draw #

Draw figure.

Parameters:

Name Type Description Default
labelpad int | None

Spacing in points from the axes bounding box including ticks and tick labels, by default "ratio"

None
Source code in puma/var_vs_var.py
def draw(
    self,
    labelpad: int | None = None,
):
    """Draw figure.

    Parameters
    ----------
    labelpad : int | None, optional
        Spacing in points from the axes bounding box including
        ticks and tick labels, by default "ratio"
    """
    self.set_xlim(
        self.x_var_min if self.xmin is None else self.xmin,
        self.x_var_max if self.xmax is None else self.xmax,
    )
    plt_handles = self.plot()
    if self.n_ratio_panels == 1:
        self.plot_ratios()
    self.set_title()
    self.set_log()
    self.set_y_lim()
    self.set_xlabel()
    self.set_tick_params()
    self.set_ylabel(self.axis_top)

    if self.n_ratio_panels > 0:
        assert isinstance(self.ylabel_ratio, list)
        self.set_ylabel(
            self.ratio_axes[0],
            self.ylabel_ratio[0],
            align="center",
            labelpad=labelpad,
        )
    self.make_legend(plt_handles, ax_mpl=self.axis_top)
    self.plotting_done = True
    if self.apply_atlas_style is True:
        self.atlasify()

draw_hline #

Draw hline in top plot panel.

Parameters:

Name Type Description Default
y_val float

y value of the horizontal line

required
Source code in puma/var_vs_var.py
def draw_hline(self, y_val: float):
    """Draw hline in top plot panel.

    Parameters
    ----------
    y_val : float
        y value of the horizontal line
    """
    self.axis_top.hlines(
        y=y_val,
        xmin=self.x_var_min,
        xmax=self.x_var_max,
        colors="black",
        linestyle="dotted",
        alpha=0.5,
    )

get_reference_name #

Get reference VarVsVar object from list of references.

Parameters:

Name Type Description Default
var_object puma.var_vs_var.VarVsVar

VarVsVar we want to calculate the ratio for

required

Returns:

Name Type Description
reference_name puma.var_vs_var.VarVsVar | None

Corresponding reference VarVsVar

Raises:

Type Description
ValueError

If no reference VarVsVar was found or multiple matches.

Source code in puma/var_vs_var.py
def get_reference_name(self, var_object: VarVsVar) -> VarVsVar | None:
    """Get reference VarVsVar object from list of references.

    Parameters
    ----------
    var_object : VarVsVar
        VarVsVar we want to calculate the ratio for

    Returns
    -------
    reference_name : VarVsVar | None
        Corresponding reference VarVsVar

    Raises
    ------
    ValueError
        If no reference VarVsVar was found or multiple matches.
    """
    matches = 0
    reference_name = None

    for key in self.reference_object:
        reference_candidate = self.plot_objects[key]
        if var_object.ratio_group is not None:
            if var_object.ratio_group == reference_candidate.ratio_group:
                matches += 1
                reference_name = reference_candidate
        else:
            matches += 1
            reference_name = reference_candidate

    if matches != 1:
        raise ValueError(
            f"Found {matches} matching reference candidates, but only one match is allowed."
        )

    logger.debug("Reference var_object for '%s' is '%s'", var_object.key, reference_name.key)

    return reference_name

plot #

Plotting curves.

Parameters:

Name Type Description Default
**kwargs typing.Any

Keyword arguments passed to plt.axis.errorbar

{}

Returns:

Type Description
matplotlib.lines.Line2D

matplotlib Line2D object

Source code in puma/var_vs_var.py
def plot(self, **kwargs: Any) -> mpl.lines.Line2D:
    """Plotting curves.

    Parameters
    ----------
    **kwargs: Any
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    mpl.lines.Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves")
    plt_handles = []
    for key in self.add_order:
        elem = self.plot_objects[key]
        error_bar = self.axis_top.errorbar(
            elem.x_var,
            elem.y_var_mean,
            xerr=elem.x_var_widths / 2 if elem.x_var_widths is not None else None,
            yerr=(elem.y_var_std if elem.plot_y_std else np.zeros_like(elem.x_var)),
            color=elem.colour,
            fmt="none",
            label=elem.label,
            alpha=elem.alpha,
            linewidth=elem.linewidth,
            ms=elem.markersize,
            **kwargs,
        )
        # # set linestyle for errorbar
        error_bar[-1][0].set_linestyle(elem.linestyle)
        # Draw markers
        if elem.is_marker is True:
            self.axis_top.scatter(
                x=elem.x_var,
                y=elem.y_var_mean,
                marker=elem.marker,
                s=elem.markersize**2,
                color=elem.colour,
            )
        if elem.x_var_widths is not None and elem.fill:
            for x_pos, y_pos, width, height in zip(
                elem.x_var,
                elem.y_var_mean,
                elem.x_var_widths,
                2 * elem.y_var_std,
                strict=False,
            ):
                self.axis_top.add_patch(
                    Rectangle(
                        xy=(
                            x_pos - width / 2,
                            y_pos - height / 2,
                        ),
                        width=width,
                        height=height,
                        linewidth=0,
                        color=elem.colour,
                        alpha=0.3,
                        zorder=1,
                    )
                )
        plt_handles.append(
            mpl.lines.Line2D(
                [],
                [],
                color=elem.colour,
                label=elem.label,
                linestyle=elem.linestyle,
                marker=elem.marker,
                markersize=elem.markersize,
            )
        )
    return plt_handles

plot_ratios #

Plotting ratio curves.

Raises:

Type Description
ValueError

If no reference curve is defined

Source code in puma/var_vs_var.py
def plot_ratios(self):
    """Plotting ratio curves.

    Raises
    ------
    ValueError
        If no reference curve is defined
    """
    if self.reference_object is None:
        raise ValueError("Please specify a reference curve.")
    for key in self.add_order:
        elem = self.plot_objects[key]
        ratio, ratio_err = elem.divide(
            other=self.get_reference_name(elem),
            method=self.ratio_method,
        )
        error_bar = self.ratio_axes[0].errorbar(
            elem.x_var,
            ratio,
            xerr=elem.x_var_widths / 2 if elem.x_var_widths is not None else None,
            yerr=ratio_err if elem.plot_y_std else np.zeros_like(elem.x_var),
            color=elem.colour,
            fmt="none",
            alpha=elem.alpha,
            linewidth=elem.linewidth,
            ms=elem.markersize,
        )
        # set linestyle for errorbar
        error_bar[-1][0].set_linestyle(elem.linestyle)
        # draw markers
        if elem.is_marker is True:
            self.ratio_axes[0].scatter(
                x=elem.x_var,
                y=ratio,
                marker=elem.marker,
                color=elem.colour,
                s=elem.markersize**2,
            )
        if elem.x_var_widths is not None and elem.fill:
            for x_pos, y_pos, width, height in zip(
                elem.x_var, ratio, elem.x_var_widths, 2 * ratio_err, strict=False
            ):
                self.ratio_axes[0].add_patch(
                    Rectangle(
                        xy=(
                            x_pos - width / 2,
                            y_pos - height / 2,
                        ),
                        width=width,
                        height=height,
                        linewidth=0,
                        color=elem.colour,
                        alpha=0.3,
                        zorder=1,
                    )
                )

set_reference #

Setting the reference VarVsVar curves used in the ratios.

Parameters:

Name Type Description Default
key str

Unique identifier of roc object

required
Source code in puma/var_vs_var.py
def set_reference(self, key: str):
    """Setting the reference VarVsVar curves used in the ratios.

    Parameters
    ----------
    key : str
        Unique identifier of roc object
    """
    if self.reference_object is None:
        self.reference_object = [key]
    else:
        self.reference_object.append(key)
    logger.debug("Adding '%s' to reference VarVsVar(s)", key)

puma.var_vs_eff.VarVsEff #

Bases: puma.var_vs_var.VarVsVar

Class for efficiency vs. variable plot.

Parameters:

Name Type Description Default
x_var_sig numpy.ndarray

Values for x-axis variable for signal

required
disc_sig numpy.ndarray

Discriminant values for signal

required
weights_sig numpy.ndarray | None

Weights for the signal. If not provided, equal weights will be used. By default None

None
x_var_bkg numpy.ndarray

Values for x-axis variable for background, by default None

None
disc_bkg numpy.ndarray

Discriminant values for background, by default None

None
weights_bkg numpy.ndarray | None

Weights for the background. If not provided, equal weights will be used. By default None

None
bins int | list | numpy.ndarray

If bins is an int, it defines the number of equal-width bins in the given range (10, by default). If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths, by default 10

10
working_point float | list | None

Working point, by default None

None
fixed_bkg_rej float | None

Instead of fixing the signal efficiency (by using working_point), fix the background rejection to a certain value. By default None

None
disc_cut int | list | numpy.ndarray | None

Cut value for discriminant, if it is a sequence it has to have the same length as number of bins, by default None

None
flat_per_bin bool

If True and no disc_cut is given the signal efficiency is held constant in each bin, by default False

False
key str | None

Identifier for the curve e.g. tagger, by default None

None
**kwargs typing.Any

Keyword arguments passed to PlotLineObject

{}

Raises:

Type Description
ValueError

x_var_sig and disc_sig have different lengths x_var_bkg and disc_bkg have different lengths Neither working_point nor flat_per_bin was set Both disc_cut and flat_per_bin are set working_point is not set but flat_per_bin is Using PCFT bins and flat_per_bin together Using disc_cut and working_point together disc_cut (if an array) has a different length than the number of bins given

TypeError

If "working_point" is neither a list nor a float

Source code in puma/var_vs_eff.py
def __init__(
    self,
    x_var_sig: np.ndarray,
    disc_sig: np.ndarray,
    weights_sig: np.ndarray | None = None,
    x_var_bkg: np.ndarray = None,
    disc_bkg: np.ndarray = None,
    weights_bkg: np.ndarray | None = None,
    bins: int | list | np.ndarray = 10,
    working_point: float | list | None = None,
    fixed_bkg_rej: float | None = None,
    disc_cut: int | list | np.ndarray | None = None,
    flat_per_bin: bool = False,
    key: str | None = None,
    **kwargs: Any,
) -> None:
    if len(x_var_sig) != len(disc_sig):
        raise ValueError(
            f"Length of `x_var_sig` ({len(x_var_sig)}) and `disc_sig` "
            f"({len(disc_sig)}) have to be identical."
        )
    if x_var_bkg is not None and len(x_var_bkg) != len(disc_bkg):
        raise ValueError(
            f"Length of `x_var_bkg` ({len(x_var_bkg)}) and `disc_bkg` "
            f"({len(disc_bkg)}) have to be identical."
        )

    # Ensure that signal/background variables are arrays
    self.x_var_sig = np.array(x_var_sig)
    self.disc_sig = np.array(disc_sig)
    self.x_var_bkg = None if x_var_bkg is None else np.array(x_var_bkg)
    self.disc_bkg = None if disc_bkg is None else np.array(disc_bkg)

    # Check the signal weights and use ones if not given
    if weights_sig is None:
        self.weights_sig = np.ones_like(self.disc_sig, dtype=float)

    # Else ensure that they have the same size as the discriminants
    else:
        self.weights_sig = np.asarray(weights_sig, dtype=float)
        if self.weights_sig.shape != self.disc_sig.shape:
            raise ValueError(
                "Length of 'weights_sig' "
                f"({self.weights_sig.shape}) must match 'disc_sig' ({self.disc_sig.shape})."
            )

    # If no bkg discs are given, don't use bkg weights
    if self.disc_bkg is None:
        self.weights_bkg = None

    # If no weights but discs are given, use ones
    elif weights_bkg is None:
        self.weights_bkg = np.ones_like(self.disc_bkg, dtype=float)

    # Else ensure that they have the same size as the discs
    else:
        self.weights_bkg = np.asarray(weights_bkg, dtype=float)
        if self.weights_bkg.shape != self.disc_bkg.shape:
            raise ValueError(
                "Length of 'weights_bkg' "
                f"({self.weights_bkg.shape}) must match 'disc_bkg' ({self.disc_bkg.shape})."
            )

    # Define attributes for the working points
    self.working_point = working_point
    self.fixed_bkg_rej = fixed_bkg_rej
    self.disc_cut = disc_cut
    self.flat_per_bin = flat_per_bin

    # Binning related variables
    self.n_bins = None
    self.bin_edges = None
    self.x_bin_centres = None
    self.bin_widths = None
    self.n_bins = None

    # Binned distributions
    self.bin_indices_sig = None
    self.disc_binned_sig = None
    self.weights_binned_sig = None
    self.bin_indices_bkg = None
    self.disc_binned_bkg = None
    self.weights_binned_bkg = None

    # Kwargs
    self.kwargs = kwargs

    # Set the bin edges
    self._set_bin_edges(bins)

    # Check that either disc_cut, working_point, or fixed_bkg_rej are defined
    not_none = [
        name
        for name, val in (
            ("disc_cut", self.disc_cut),
            ("working_point", self.working_point),
            ("fixed_bkg_rej", self.fixed_bkg_rej),
        )
        if val is not None
    ]

    if len(not_none) != 1:
        raise ValueError(
            f"Exactly one of disc_cut, working_point, fixed_bkg_rej must be provided! "
            f"Got {', '.join(not_none) if not_none else 'none'}."
        )

    if self.disc_cut is not None:
        # Ensure that flat_per_bin cannot be set
        if self.flat_per_bin:
            raise ValueError(
                "You cannot specify `disc_cut` when `flat_per_bin` is set to True."
            )

        # Ensure that disc_cut has the same length as the number of bins if it's used
        if isinstance(disc_cut, (list, np.ndarray)) and self.n_bins != len(disc_cut):
            raise ValueError(
                "`disc_cut` has to be a float or has to have the same length as number of bins."
            )

    elif self.working_point is not None:
        # Ensure that only a one float is given to working_point with flat_per_bin
        if self.flat_per_bin and not isinstance(working_point, float):
            raise ValueError("working_point must be a float when running with flat_per_bin!")

        # Ensure that the given working points are an array
        if isinstance(self.working_point, list):
            self.working_point = np.asarray(self.working_point)

        # Throw error if the working points are neither a float nor a list
        elif not isinstance(working_point, float):
            raise TypeError(
                "`working_point` must either be a list or a float! "
                f"You gave {type(self.working_point)}"
            )

    elif self.fixed_bkg_rej is not None:
        # Ensure that fixed_bkg_rej is a float:
        if not isinstance(fixed_bkg_rej, (int, float)):
            raise TypeError(
                "`fixed_bkg_rej` must be an int or a float!"
                f" You gave {type(self.fixed_bkg_rej)}"
            )

    # Apply the binning and get the discriminant value cuts as attributes
    self._apply_binning()
    self._get_disc_cuts()

    # Setup some needed attributes and also check them
    VarVsVar.__init__(
        self,
        x_var=self.x_bin_centres,
        y_var_mean=np.zeros_like(self.x_bin_centres),
        y_var_std=np.zeros_like(self.x_bin_centres),
        x_var_widths=2 * self.bin_widths,
        key=key,
        fill=True,
        plot_y_std=False,
        **kwargs,
    )

    # Calculate all efficiencies/rejections possible to make them easily available and storable
    self.results: dict[str, dict] = {"normal": {}, "inverse": {}}

    # Iterate over inverse and normal setup
    for iter_key in self.results:
        self.inverse_cut = iter_key == "inverse"

        # Iterate over the different output modes and store them
        for iter_mode in VarVsEffPlot.mode_options:
            self.results[iter_key][iter_mode] = {}

            # We can't calculate always everything, so the ones that
            # make problems will be filtered
            try:
                (
                    self.results[iter_key][iter_mode]["y_value"],
                    self.results[iter_key][iter_mode]["y_error"],
                ) = getattr(self, iter_mode)

            except (TypeError, ValueError):
                (
                    self.results[iter_key][iter_mode]["y_value"],
                    self.results[iter_key][iter_mode]["y_error"],
                ) = (None, None)

    # Set inverse_cut back to false
    self.inverse_cut = False

args_to_store property #

Returns the arguments that need to be stored/loaded.

Returns:

Type Description
dict[str, typing.Any]

Dict with the arguments

bkg_eff property #

Calculate background efficiency per bin.

Returns:

Type Description
numpy.ndarray

Efficiency

numpy.ndarray

Efficiency_error

bkg_eff_sig_err property #

Calculate signal efficiency per bin, assuming a flat background per bin. This results in returning the signal efficiency per bin, but the background error per bin.

bkg_rej property #

Calculate background rejection per bin.

Returns:

Type Description
numpy.ndarray

Rejection

numpy.ndarray

Rejection_error

sig_eff property #

Calculate signal efficiency per bin.

Returns:

Type Description
numpy.ndarray

Efficiency

numpy.ndarray

Efficiency_error

sig_rej property #

Calculate signal rejection per bin.

Returns:

Type Description
numpy.ndarray

Rejection

numpy.ndarray

Rejection_error

efficiency #

Calculate efficiency and the associated error.

Parameters:

Name Type Description Default
arr numpy.ndarray

Array with discriminants

required
cut float | numpy.ndarray

Cut value. If you want to use PCFT, two values are provided. The lower and the upper cut.

required
weights numpy.ndarray | None

Weights to use for the array. Must be same length as arr! When None is given, equal weights are used. By default None

None

Returns:

Type Description
float | numpy.ndarray

Efficiency

float | numpy.ndarray

Efficiency error

Raises:

Type Description
TypeError

If the cut parameter type is not supported

Source code in puma/var_vs_eff.py
def efficiency(
    self,
    arr: np.ndarray,
    cut: float | np.ndarray,
    weights: np.ndarray | None = None,
) -> tuple[float, float] | tuple[np.ndarray, np.ndarray]:
    """Calculate efficiency and the associated error.

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float | np.ndarray
        Cut value. If you want to use PCFT, two values are provided.
        The lower and the upper cut.
    weights : np.ndarray | None, optional
        Weights to use for the array. Must be same length as arr!
        When None is given, equal weights are used. By default None

    Returns
    -------
    float | np.ndarray
        Efficiency
    float | np.ndarray
        Efficiency error

    Raises
    ------
    TypeError
        If the cut parameter type is not supported
    """
    # If no values are given, return 0 for eff and eff_err
    if len(arr) == 0:
        return 0.0, 0.0

    # Fall back if no weights are used
    if weights is None:
        weights = np.ones_like(arr, dtype=float)

    # If the cut is a single value, this is a non-PCFT WP
    if isinstance(cut, (int, float, np.integer, np.floating)):
        # Check for the inverse cut
        mask = arr < cut if self.inverse_cut else arr > cut

    # If the cut is an array, this is a PCFT-style two-sided cut
    # Keep values between cut[1] and cut[0]
    elif isinstance(cut, np.ndarray):
        mask = (arr < cut[0]) & (arr > cut[1])

    # Else return an error that this type of cut is not supported
    else:
        raise TypeError(
            f"cut parameter type {type(cut)} is not supported! Must be float or np.ndarray"
        )

    # With the given mask, calculate the sum of the weights
    numerator = float(weights[mask].sum())
    denominator = float(weights.sum())

    # Check if the denominator is > 0
    # If not, return 0 as eff and eff_err
    if denominator <= 0:
        return 0.0, 0.0

    # Calculate the efficiency and the error
    eff = save_divide(numerator=numerator, denominator=denominator)

    # Use effective sample size for proper weighted binomial uncertainty:
    # N_eff = (sum w)^2 / sum(w^2). For uniform weights, N_eff = N.
    n_eff = float(weights.sum()) ** 2 / float((weights**2).sum())
    eff_error = calculate_efficiency_error(eff, n_eff)

    return eff, eff_error

get #

Wrapper around rejection and efficiency functions.

Parameters:

Name Type Description Default
mode str

Can be "sig_eff", "bkg_eff", "sig_rej", "bkg_rej", or "bkg_eff_sig_err"

required
inverse_cut bool

Inverts the discriminant cut, which will yield the efficiency or rejection of the jets not passing the working point, by default False

False

Returns:

Type Description
numpy.ndarray

Rejection or efficiency depending on mode value

numpy.ndarray

Rejection or efficiency error depending on mode value

Raises:

Type Description
ValueError

If mode not supported

Source code in puma/var_vs_eff.py
def get(self, mode: str, inverse_cut: bool = False):
    """Wrapper around rejection and efficiency functions.

    Parameters
    ----------
    mode : str
        Can be "sig_eff", "bkg_eff", "sig_rej", "bkg_rej", or
        "bkg_eff_sig_err"
    inverse_cut : bool, optional
        Inverts the discriminant cut, which will yield the efficiency or rejection
        of the jets not passing the working point, by default False

    Returns
    -------
    np.ndarray
        Rejection or efficiency depending on `mode` value
    np.ndarray
        Rejection or efficiency error depending on `mode` value

    Raises
    ------
    ValueError
        If mode not supported
    """
    if mode in VarVsEffPlot.mode_options:
        name = "normal" if inverse_cut is False else "inverse"
        return (
            self.results[name][mode]["y_value"],
            self.results[name][mode]["y_error"],
        )
    raise ValueError(
        f"The selected mode {mode} is not supported. Use one of the following:"
        f" {VarVsEffPlot.mode_options}."
    )

rejection #

Calculate rejection and the associated error.

Parameters:

Name Type Description Default
arr numpy.ndarray

Array with discriminants

required
cut float | numpy.ndarray

Cut value used for the given working point. Ndarray when PCFT-style cuts are used

required
weights numpy.ndarray | None

Weights to use for the array. Must be same length as arr! When None is given, equal weights are used. By default None

None

Returns:

Type Description
float | numpy.ndarray

Rejection

float | numpy.ndarray

Rejection error

Raises:

Type Description
TypeError

If the cut parameter type is not supported

Source code in puma/var_vs_eff.py
def rejection(
    self,
    arr: np.ndarray,
    cut: float | np.ndarray,
    weights: np.ndarray | None = None,
) -> tuple[float, float] | tuple[np.ndarray, np.ndarray]:
    """Calculate rejection and the associated error.

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float | np.ndarray
        Cut value used for the given working point. Ndarray
        when PCFT-style cuts are used
    weights : np.ndarray | None, optional
        Weights to use for the array. Must be same length as arr!
        When None is given, equal weights are used. By default None

    Returns
    -------
    float | np.ndarray
        Rejection
    float | np.ndarray
        Rejection error

    Raises
    ------
    TypeError
        If the cut parameter type is not supported
    """
    # If no values are given, return 0 for eff and eff_err
    if len(arr) == 0:
        return 0.0, 0.0

    # Fall back if no weights are used
    if weights is None:
        weights = np.ones_like(arr, dtype=float)

    # Get the denominator from the total sum of weights
    denominator = float(weights.sum())

    # Return NaN if total weights is <= 0
    if denominator <= 0:
        logger.warning("Total weight in bin is <= 0. Setting rejection to NaN.")
        return np.nan, np.nan

    # If the cut is a single value, this is a non-PCFT WP
    if isinstance(cut, (int, float, np.integer, np.floating)):
        # Check for the inverse cut
        mask = arr < cut if self.inverse_cut else arr > cut

    # If the cut is an array, this is a PCFT-style two-sided cut
    # Keep values between cut[1] and cut[0]
    elif isinstance(cut, np.ndarray):
        mask = (arr < cut[0]) & (arr > cut[1])

    # Else return an error that this type of cut is not supported
    else:
        raise TypeError(
            f"cut parameter type {type(cut)} is not supported! Must be float or np.ndarray"
        )

    # Get the total weight for passing jets
    numerator = float(weights[mask].sum())

    # Calculate the rejection by using the inverse of the efficiency (1 / eff)
    # To do so with less calculation, flip numerator and denominator
    rej = save_divide(numerator=denominator, denominator=numerator, default=np.inf)

    # If the rejection is infinite, print a warning and return NaN
    if rej == np.inf:
        logger.warning("Your rejection is infinity -> setting it to np.nan.")
        return np.nan, np.nan

    # Use effective sample size for proper weighted binomial uncertainty:
    # N_eff = (sum w)^2 / sum(w^2). For uniform weights, N_eff = N.
    n_eff = denominator**2 / float((weights**2).sum())
    rej_error = calculate_rejection_error(rej, n_eff)
    return rej, rej_error

puma.var_vs_eff.VarVsEffPlot #

Bases: puma.var_vs_var.VarVsVarPlot

VarVsEff plot class.

Parameters:

Name Type Description Default
mode str

Defines which quantity is plotted, the following options ar available: sig_eff - Plots signal efficiency vs. variable, with statistical error on N signal per bin bkg_eff - Plots background efficiency vs. variable, with statistical error on N background per bin sig_rej - Plots signal rejection vs. variable, with statistical error on N signal per bin bkg_rej - Plots background rejection vs. variable, with statistical error on N background per bin bkg_eff_sig_err - Plots background efficiency vs. variable, with statistical error on N signal per bin.

required
grid bool

Set the grid for the plots.

False
**kwargs typing.Any

Keyword arguments from puma.PlotObject

{}

Attributes:

Name Type Description
mode_options typing.ClassVar[list[str]]

List of possible modes.

Raises:

Type Description
ValueError

If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_eff.py
def __init__(self, mode: str, grid: bool = False, **kwargs: Any) -> None:
    super().__init__(grid=grid, **kwargs)
    if mode not in self.mode_options:
        raise ValueError(
            f"The selected mode {mode} is not supported. Use one of the following: "
            f"{self.mode_options}."
        )
    self.mode = mode

apply_modified_atlas_second_tag #

Modifies the atlas_second_tag to include info on the type of p-eff plot.

Parameters:

Name Type Description Default
signal ftag.labels.Label | None

Signal flavour that is used. By default None

None
background ftag.labels.Label | None

Background flavour that is used. By default None

None
working_point float | list | None

Working point that was used. When PCFT was used, this is a list of length 2. If disc_cut/fixed_bkg_rej is used, this needs to be None, by default None

None
fixed_bkg_rej float | None

The background rejection to which it was fixed. Must be None when working_point or disc_cut is used. By default None

None
disc_cut float | None

Discriminant cut that was used. If working_point/fixed_bkg_rej is used, this must be None. By default None

None
flat_per_bin bool

If flat_per_bin was used, by default False

False
Source code in puma/var_vs_eff.py
def apply_modified_atlas_second_tag(
    self,
    signal: Label | None = None,
    background: Label | None = None,
    working_point: float | list | None = None,
    fixed_bkg_rej: float | None = None,
    disc_cut: float | None = None,
    flat_per_bin: bool = False,
):
    """Modifies the atlas_second_tag to include info on the type of p-eff plot.

    Parameters
    ----------
    signal : Label | None, optional
        Signal flavour that is used. By default None
    background : Label | None, optional
        Background flavour that is used. By default None
    working_point : float | list | None, optional
        Working point that was used. When PCFT was used, this is a list of length 2.
        If disc_cut/fixed_bkg_rej is used, this needs to be None, by default None
    fixed_bkg_rej : float | None, optional
        The background rejection to which it was fixed. Must be None when working_point
        or disc_cut is used. By default None
    disc_cut : float | None, optional
        Discriminant cut that was used. If working_point/fixed_bkg_rej is used,
        this must be None. By default None
    flat_per_bin : bool, optional
        If flat_per_bin was used, by default False
    """
    if working_point:
        if isinstance(working_point, list):
            mid_str = (
                f"{int(round(working_point[0] * 100, 0))}% - "
                f"{int(round(working_point[1] * 100, 0))}% " + signal.eff_str
            )

        else:
            mid_str = f"{int(round(working_point * 100, 0))}% " + signal.eff_str
    elif disc_cut:
        mid_str = rf"$D_{{{signal.name.rstrip('jets')}}}$ > {disc_cut}"
    elif fixed_bkg_rej:
        mid_str = f"{int(round(fixed_bkg_rej, 0))} " + background.rej_str
    tag = f"Flat {mid_str} per bin" if flat_per_bin else f"{mid_str}"
    if self.atlas_second_tag:
        self.atlas_second_tag = f"{self.atlas_second_tag}\n{tag}"
    else:
        self.atlas_second_tag = tag

plot #

Plotting curves.

Parameters:

Name Type Description Default
**kwargs typing.Any

Keyword arguments passed to plt.axis.errorbar

{}

Returns:

Type Description
matplotlib.lines.Line2D

matplotlib Line2D object

Source code in puma/var_vs_eff.py
def plot(self, **kwargs: Any) -> mpl.lines.Line2D:
    """Plotting curves.

    Parameters
    ----------
    **kwargs: Any
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    mpl.lines.Line2D
        matplotlib Line2D object
    """
    logger.debug(f"Plotting curves with mode {self.mode}")
    self._setup_curves()
    return super().plot(**kwargs)

puma.var_vs_vtx.VarVsVtx #

Bases: puma.var_vs_var.VarVsVar

VarVsVtx class storing info about vertexing performance.

Parameters:

Name Type Description Default
x_var numpy.ndarray

Values for x-axis variable for signal

required
n_match numpy.ndarray

Values for number of correctly identified objects (where truth and reco match)

required
n_true numpy.ndarray

Values for true number of objects

required
n_reco numpy.ndarray

Values for reconstructed number of objects

required
bins int | collections.abc.Sequence

If bins is an int, it defines the number of equal-width bins in the given range (10, by default). If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths, by default 10

10
key str | None

Identifier for the curve e.g. tagger, by default None

None
**kwargs typing.Any

Keyword arguments passed to PlotLineObject

{}

Raises:

Type Description
ValueError

If provided options are not compatible with each other

Source code in puma/var_vs_vtx.py
def __init__(
    self,
    x_var: np.ndarray,
    n_match: np.ndarray,
    n_true: np.ndarray,
    n_reco: np.ndarray,
    bins: int | Sequence = 10,
    key: str | None = None,
    **kwargs: Any,
) -> None:
    if len(x_var) != len(n_match):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_match` "
            f"({len(n_match)}) have to be identical."
        )
    if len(x_var) != len(n_true):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_true` "
            f"({len(n_true)}) have to be identical."
        )
    if len(x_var) != len(n_reco):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_reco` "
            f"({len(n_reco)}) have to be identical."
        )

    self.x_var = np.array(x_var)
    self.n_match = np.array(n_match)
    self.n_true = np.array(n_true)
    self.n_reco = np.array(n_reco)
    # Binning related variables
    self.n_bins = None
    self.bin_edges = None
    self.x_bin_centres = None
    self.bin_widths = None
    # Binned distributions
    self.bin_indices = None
    self.metric_binned = None

    self._set_bin_edges(bins)
    self._apply_binning()

    VarVsVar.__init__(
        self,
        x_var=self.x_bin_centres,
        y_var_mean=np.zeros_like(self.x_bin_centres),
        y_var_std=np.zeros_like(self.x_bin_centres),
        x_var_widths=2 * self.bin_widths,
        key=key,
        fill=True,
        plot_y_std=False,
        **kwargs,
    )

efficiency property #

Calculate vertexing efficiency per bin. Defined as number of reconstructed vertices matched to truth divided by number of total true vertices.

Returns:

Type Description
numpy.ndarray

Efficiency

numpy.ndarray

Efficiency error

fakes property #

Calculate vertexing fake rate per bin. Defined as total number of events with reconstructed vertices where vertices are not expected.

Returns:

Type Description
numpy.ndarray

Fake rate

numpy.ndarray

Fake rate error

purity property #

Calculate vertexing purity per bin. Defined as number of reconstructed vertices matched to truth divided by number of total reconstructed vertices.

Returns:

Type Description
numpy.ndarray

Purity

numpy.ndarray

Purity error

__eq__ #

Handles a == check with the class.

Parameters:

Name Type Description Default
other puma.var_vs_vtx.VarVsVtx

Other VarVsVtx that this class is tested against

required

Returns:

Type Description
bool

If this VarVsVtx and the other are equal

Source code in puma/var_vs_vtx.py
def __eq__(self, other: VarVsVtx) -> bool:
    """Handles a == check with the class.

    Parameters
    ----------
    other : VarVsVtx
        Other VarVsVtx that this class is tested against

    Returns
    -------
    bool
        If this VarVsVtx and the other are equal
    """
    if isinstance(other, self.__class__):
        return (
            np.all(self.x_var == other.x_var)
            and np.all(self.n_match == other.n_match)
            and np.all(self.n_true == other.n_true)
            and np.all(self.n_reco == other.n_reco)
            and np.all(self.bin_edges == other.bin_edges)
            and self.key == other.key
        )
    return False

get #

Wrapper around rejection and efficiency functions.

Parameters:

Name Type Description Default
mode str

Can be "efficiency", "purity" or "fakes"

required

Returns:

Type Description
numpy.ndarray

Efficiency, purity or fake rate depending on mode value

numpy.ndarray

Efficiency, purity or fake rate error depending on mode value

Raises:

Type Description
ValueError

If mode not supported

Source code in puma/var_vs_vtx.py
def get(self, mode: str):
    """Wrapper around rejection and efficiency functions.

    Parameters
    ----------
    mode : str
        Can be "efficiency", "purity" or "fakes"

    Returns
    -------
    np.ndarray
        Efficiency, purity or fake rate depending on `mode` value
    np.ndarray
        Efficiency, purity or fake rate error depending on `mode` value

    Raises
    ------
    ValueError
        If mode not supported
    """
    if mode == "efficiency":
        return self.efficiency
    if mode == "purity":
        return self.purity
    if mode == "fakes":
        return self.fakes
    raise ValueError(
        f"The selected mode {mode} is not supported. Use one of the following:"
        f" {VarVsVtxPlot.mode_options}."
    )

get_performance_ratio #

Calculate performance ratio for vertexing task. Either n_matched/n_true (efficiency) or n_matched/n_reco (purity).

Parameters:

Name Type Description Default
num numpy.ndarray

Array with discriminants

required
denom numpy.ndarray

Cut value

required

Returns:

Type Description
numpy.ndarray

Performance ratio

numpy.ndarray

Performance ratio error

Source code in puma/var_vs_vtx.py
def get_performance_ratio(
    self,
    num: np.ndarray,
    denom: np.ndarray,
) -> tuple[np.ndarray, np.ndarray]:
    """Calculate performance ratio for vertexing task. Either n_matched/n_true
    (efficiency) or n_matched/n_reco (purity).

    Parameters
    ----------
    num : np.ndarray
        Array with discriminants
    denom : np.ndarray
        Cut value

    Returns
    -------
    np.ndarray
        Performance ratio
    np.ndarray
        Performance ratio error
    """
    pm = save_divide(np.sum(num), np.sum(denom), default=np.inf)
    if pm == np.inf:
        logger.warning("Your vertexing performance ratio is infinity -> setting it to np.nan.")
        return np.nan, np.nan
    if pm == 0:
        logger.warning("Your vertexing performance ratio is zero -> setting error to zero.")
        return 0.0, 0.0
    pm_error = calculate_efficiency_error(pm, len(num))
    return pm, pm_error

puma.var_vs_vtx.VarVsVtxPlot #

Bases: puma.var_vs_var.VarVsVarPlot

var_vs_vtx plot class.

Parameters:

Name Type Description Default
mode str

Defines which quantity is plotted, the following options ar available: efficiency - Plots efficiency vs. variable for jets where vertices are expected purity - Plots purity vs. variable for jets where vertices are expected fakes - Plots fake rate vs. variable for jets where vertices are not expected

required
grid bool

Set the grid for the plots.

False
**kwargs typing.Any

Keyword arguments from puma.PlotObject

{}

Attributes:

Name Type Description
mode_options typing.ClassVar[list[str]]

List of possible modes.

Raises:

Type Description
ValueError

If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_vtx.py
def __init__(self, mode: str, grid: bool = False, **kwargs: Any) -> None:
    super().__init__(grid=grid, **kwargs)
    if mode not in self.mode_options:
        raise ValueError(
            f"The selected mode {mode} is not supported. Use one of the following: "
            f"{self.mode_options}."
        )
    self.mode = mode

plot #

Plotting curves.

Parameters:

Name Type Description Default
**kwargs typing.Any

Keyword arguments passed to plt.axis.errorbar

{}

Returns:

Type Description
matplotlib.lines.Line2D

matplotlib Line2D object

Source code in puma/var_vs_vtx.py
def plot(self, **kwargs: Any) -> mpl.lines.Line2D:
    """Plotting curves.

    Parameters
    ----------
    **kwargs: Any
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    mpl.lines.Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves with mode %s", self.mode)
    self._setup_curves()
    return super().plot(**kwargs)