Variable vs Variable

`puma.var_vs_var.VarVsVar` #

Bases: puma.plot_base.PlotLineObject

VarVsVar class storing info about curve and allows to calculate ratio w.r.t other efficiency plots.

Initialise properties of VarVsVar curve object.

Parameters:

Name	Type	Description	Default
`x_var`	`numpy.ndarray`	Values for x-axis variable, e.g. bin midpoints for binned data	required
`y_var_mean`	`numpy.ndarray`	Mean value for y-axis variable	required
`y_var_std`	`numpy.ndarray`	Std value for y-axis variable	required
`x_var_widths`	`numpy.ndarray`	Widths for x-axis variable, e.g. bin widths for binned data	`None`
`key`	`str`	Identifier for the curve e.g. tagger, by default None	`None`
`fill`	`bool`	Defines do we need to fill box around point, by default True	`True`
`plot_y_std`	`bool`	Defines do we need to plot y_var_std, by default True	`True`
`**kwargs`	`kwargs`	Keyword arguments passed to `PlotLineObject`	`{}`

Raises:

Type	Description
`ValueError`	If provided options are not compatible with each other

Source code in puma/var_vs_var.py

def __init__(
    self,
    x_var: np.ndarray,
    y_var_mean: np.ndarray,
    y_var_std: np.ndarray,
    x_var_widths: np.ndarray = None,
    key: str | None = None,
    fill: bool = True,
    plot_y_std: bool = True,
    **kwargs,
) -> None:
    """Initialise properties of VarVsVar curve object.

    Parameters
    ----------
    x_var : np.ndarray
        Values for x-axis variable, e.g. bin midpoints for binned data
    y_var_mean : np.ndarray
        Mean value for y-axis variable
    y_var_std : np.ndarray
        Std value for y-axis variable
    x_var_widths : np.ndarray, optional
        Widths for x-axis variable, e.g. bin widths for binned data
    key : str, optional
        Identifier for the curve e.g. tagger, by default None
    fill : bool, optional
        Defines do we need to fill box around point, by default True
    plot_y_std : bool, optional
        Defines do we need to plot y_var_std, by default True
    **kwargs : kwargs
        Keyword arguments passed to `PlotLineObject`

    Raises
    ------
    ValueError
        If provided options are not compatible with each other
    """
    super().__init__(**kwargs)
    if len(x_var) != len(y_var_mean):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `y_var_mean` "
            f"({len(y_var_mean)}) have to be identical."
        )
    if len(x_var) != len(y_var_std):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `y_var_std` "
            f"({len(y_var_std)}) have to be identical."
        )
    if x_var_widths is not None and len(x_var) != len(x_var_widths):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `x_var_widths` "
            f"({len(x_var_widths)}) have to be identical."
        )
    self.x_var = np.array(x_var)
    self.x_var_widths = None if x_var_widths is None else np.array(x_var_widths)
    self.y_var_mean = np.array(y_var_mean)
    self.y_var_std = np.array(y_var_std)

    self.key = key
    self.fill = fill
    self.plot_y_std = plot_y_std

`divide` #

Calculate ratio between two class objects.

Parameters:

Name	Type	Description	Default
`other`	`VarVsVar class`	Second VarVsVar object to calculate ratio with	required
`inverse`	`bool`	If False the ratio is calculated `this / other`, if True the inverse is calculated	`False`

Returns:

Type	Description
`numpy.ndarray`	Ratio
`numpy.ndarray`	Ratio error

Raises:

Type	Description
`ValueError`	If binning is not identical between 2 objects

Source code in puma/var_vs_var.py

def divide(self, other, inverse: bool = False):
    """Calculate ratio between two class objects.

    Parameters
    ----------
    other : VarVsVar class
        Second VarVsVar object to calculate ratio with
    inverse : bool
        If False the ratio is calculated `this / other`,
        if True the inverse is calculated

    Returns
    -------
    np.ndarray
        Ratio
    np.ndarray
        Ratio error

    Raises
    ------
    ValueError
        If binning is not identical between 2 objects
    """
    if not np.array_equal(self.x_var, other.x_var):
        raise ValueError("The x variables of the two given objects do not match.")
    nom, nom_err = self.y_var_mean, self.y_var_std
    denom, denom_err = other.y_var_mean, other.y_var_std

    ratio, ratio_err = hist_ratio(
        numerator=denom if inverse else nom,
        denominator=nom if inverse else denom,
        numerator_unc=denom_err if inverse else nom_err,
        step=False,
    )
    return (ratio, ratio_err)

`puma.var_vs_var.VarVsVarPlot` #

Bases: puma.plot_base.PlotBase

var_vs_eff plot class.

var_vs_eff plot properties.

Parameters:

Name	Type	Description	Default
`grid`	`bool`	Set the grid for the plots.	`False`
`**kwargs`	`kwargs`	Keyword arguments from `puma.PlotObject`	`{}`

Raises:

Type	Description
`ValueError`	If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_var.py

def __init__(self, grid: bool = False, **kwargs) -> None:
    """var_vs_eff plot properties.

    Parameters
    ----------
    grid : bool, optional
        Set the grid for the plots.
    **kwargs : kwargs
        Keyword arguments from `puma.PlotObject`

    Raises
    ------
    ValueError
        If incompatible mode given or more than 1 ratio panel requested
    """
    super().__init__(grid=grid, **kwargs)

    self.plot_objects = {}
    self.add_order = []
    self.ratios_objects = {}
    self.reference_object = None
    self.x_var_min = np.inf
    self.x_var_max = -np.inf
    self.inverse_cut = False
    if self.n_ratio_panels > 1:
        raise ValueError("Not more than one ratio panel supported.")
    self.initialise_figure()

`add` #

Adding VarVsVar object to figure.

Parameters:

Name	Type	Description	Default
`curve`	`VarVsVar class`	VarVsVar curve	required
`key`	`str`	Unique identifier for VarVsVar curve, by default None	`None`
`reference`	`bool`	If VarVsVar is used as reference for ratio calculation, by default False	`False`

Raises:

Type	Description
`KeyError`	If unique identifier key is used twice

Source code in puma/var_vs_var.py

def add(self, curve: VarVsVar, key: str | None = None, reference: bool = False):
    """Adding VarVsVar object to figure.

    Parameters
    ----------
    curve : VarVsVar class
        VarVsVar curve
    key : str, optional
        Unique identifier for VarVsVar curve, by default None
    reference : bool, optional
        If VarVsVar is used as reference for ratio calculation, by default False

    Raises
    ------
    KeyError
        If unique identifier key is used twice
    """
    if key is None:
        key = len(self.plot_objects) + 1
    if key in self.plot_objects:
        raise KeyError(f"Duplicated key {key} already used for unique identifier.")

    self.plot_objects[key] = curve
    self.add_order.append(key)
    # set linestyle
    if curve.linestyle is None:
        curve.linestyle = "-"
    # set colours
    if curve.colour is None:
        curve.colour = get_good_colours()[len(self.plot_objects) - 1]
    # set alpha
    if curve.alpha is None:
        curve.alpha = 0.8
    # set linewidth
    if curve.linewidth is None:
        curve.linewidth = 1.6

    if curve.is_marker is True:
        if curve.marker is None:
            curve.marker = get_good_markers()[len(self.plot_objects)]
        # Set markersize
        if curve.markersize is None:
            curve.markersize = 8
        if curve.markeredgewidth is None:
            curve.markeredgewidth = 2

    # set min and max edges
    if curve.x_var_widths is not None:
        left_edge = curve.x_var - curve.x_var_widths / 2
        right_edge = curve.x_var + curve.x_var_widths / 2
    else:
        left_edge = curve.x_var
        right_edge = curve.x_var
    self.x_var_min = min(self.x_var_min, np.sort(left_edge)[0])
    self.x_var_max = max(self.x_var_max, np.sort(right_edge)[-1])

    if reference:
        logger.debug("Setting roc %s as reference.", key)
        self.set_reference(key)

`draw` #

Draw figure.

Parameters:

Name	Type	Description	Default
`labelpad`	`int`	Spacing in points from the axes bounding box including ticks and tick labels, by default "ratio"	`None`

Source code in puma/var_vs_var.py

def draw(
    self,
    labelpad: int | None = None,
):
    """Draw figure.

    Parameters
    ----------
    labelpad : int, optional
        Spacing in points from the axes bounding box including
        ticks and tick labels, by default "ratio"
    """
    self.set_xlim(
        self.x_var_min if self.xmin is None else self.xmin,
        self.x_var_max if self.xmax is None else self.xmax,
    )
    plt_handles = self.plot()
    if self.n_ratio_panels == 1:
        self.plot_ratios()
    self.set_title()
    self.set_log()
    self.set_y_lim()
    self.set_xlabel()
    self.set_tick_params()
    self.set_ylabel(self.axis_top)

    if self.n_ratio_panels > 0:
        self.set_ylabel(
            self.ratio_axes[0],
            self.ylabel_ratio[0],
            align_right=False,
            labelpad=labelpad,
        )
    self.make_legend(plt_handles, ax_mpl=self.axis_top)
    self.plotting_done = True
    if self.apply_atlas_style is True:
        self.atlasify()

`draw_hline` #

Draw hline in top plot panel.

Parameters:

Name	Type	Description	Default
`y_val`	`float`	y value of the horizontal line	required

Source code in puma/var_vs_var.py

def draw_hline(self, y_val: float):
    """Draw hline in top plot panel.

    Parameters
    ----------
    y_val : float
        y value of the horizontal line
    """
    self.axis_top.hlines(
        y=y_val,
        xmin=self.x_var_min,
        xmax=self.x_var_max,
        colors="black",
        linestyle="dotted",
        alpha=0.5,
    )

`plot` #

Plotting curves.

Parameters:

Name	Type	Description	Default
`**kwargs`		Keyword arguments passed to plt.axis.errorbar	`{}`

Returns:

Type	Description
`puma.line_plot_2d.Line2D`	matplotlib Line2D object

Source code in puma/var_vs_var.py

def plot(self, **kwargs):
    """Plotting curves.

    Parameters
    ----------
    **kwargs: kwargs
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves")
    plt_handles = []
    for key in self.add_order:
        elem = self.plot_objects[key]
        error_bar = self.axis_top.errorbar(
            elem.x_var,
            elem.y_var_mean,
            xerr=elem.x_var_widths / 2 if elem.x_var_widths is not None else None,
            yerr=(elem.y_var_std if elem.plot_y_std else np.zeros_like(elem.x_var)),
            color=elem.colour,
            fmt="none",
            label=elem.label,
            alpha=elem.alpha,
            linewidth=elem.linewidth,
            ms=elem.markersize,
            **kwargs,
        )
        # # set linestyle for errorbar
        error_bar[-1][0].set_linestyle(elem.linestyle)
        # Draw markers
        if elem.is_marker is True:
            self.axis_top.scatter(
                x=elem.x_var,
                y=elem.y_var_mean,
                marker=elem.marker,
                s=elem.markersize**2,
                color=elem.colour,
            )
        if elem.x_var_widths is not None and elem.fill:
            for x_pos, y_pos, width, height in zip(
                elem.x_var,
                elem.y_var_mean,
                elem.x_var_widths,
                2 * elem.y_var_std,
            ):
                self.axis_top.add_patch(
                    Rectangle(
                        xy=(
                            x_pos - width / 2,
                            y_pos - height / 2,
                        ),
                        width=width,
                        height=height,
                        linewidth=0,
                        color=elem.colour,
                        alpha=0.3,
                        zorder=1,
                    )
                )
        plt_handles.append(
            mpl.lines.Line2D(
                [],
                [],
                color=elem.colour,
                label=elem.label,
                linestyle=elem.linestyle,
                marker=elem.marker,
                markersize=elem.markersize,
            )
        )
    return plt_handles

`plot_ratios` #

Plotting ratio curves.

Raises:

Type	Description
`ValueError`	If no reference curve is defined

Source code in puma/var_vs_var.py

def plot_ratios(self):
    """Plotting ratio curves.

    Raises
    ------
    ValueError
        If no reference curve is defined
    """
    if self.reference_object is None:
        raise ValueError("Please specify a reference curve.")
    for key in self.add_order:
        elem = self.plot_objects[key]
        (ratio, ratio_err) = elem.divide(self.plot_objects[self.reference_object])
        error_bar = self.ratio_axes[0].errorbar(
            elem.x_var,
            ratio,
            xerr=elem.x_var_widths / 2 if elem.x_var_widths is not None else None,
            yerr=ratio_err if elem.plot_y_std else np.zeros_like(elem.x_var),
            color=elem.colour,
            fmt="none",
            alpha=elem.alpha,
            linewidth=elem.linewidth,
            ms=elem.markersize,
        )
        # set linestyle for errorbar
        error_bar[-1][0].set_linestyle(elem.linestyle)
        # draw markers
        if elem.is_marker is True:
            self.ratio_axes[0].scatter(
                x=elem.x_var,
                y=ratio,
                marker=elem.marker,
                color=elem.colour,
                s=elem.markersize**2,
            )
        if elem.x_var_widths is not None and elem.fill:
            for x_pos, y_pos, width, height in zip(
                elem.x_var, ratio, elem.x_var_widths, 2 * ratio_err
            ):
                self.ratio_axes[0].add_patch(
                    Rectangle(
                        xy=(
                            x_pos - width / 2,
                            y_pos - height / 2,
                        ),
                        width=width,
                        height=height,
                        linewidth=0,
                        color=elem.colour,
                        alpha=0.3,
                        zorder=1,
                    )
                )

`set_reference` #

Setting the reference roc curves used in the ratios.

Parameters:

Name	Type	Description	Default
`key`	`str`	Unique identifier of roc object	required

Source code in puma/var_vs_var.py

def set_reference(self, key: str):
    """Setting the reference roc curves used in the ratios.

    Parameters
    ----------
    key : str
        Unique identifier of roc object
    """
    if self.reference_object is None:
        self.reference_object = key
    else:
        logger.warning(
            "You specified a second curve %s as reference for ratio. "
            "Using it as new reference instead of %s.",
            key,
            self.reference_object,
        )
        self.reference_object = key

`puma.var_vs_eff.VarVsEff` #

Bases: puma.var_vs_var.VarVsVar

Class for efficiency vs. variable plot.

Initialise properties of roc curve object.

Parameters:

Name	Type	Description	Default
`x_var_sig`	`numpy.ndarray`	Values for x-axis variable for signal	required
`disc_sig`	`numpy.ndarray`	Discriminant values for signal	required
`x_var_bkg`	`numpy.ndarray`	Values for x-axis variable for background, by default None	`None`
`disc_bkg`	`numpy.ndarray`	Discriminant values for background, by default None	`None`
`bins`	`int or sequence of scalars`	If bins is an int, it defines the number of equal-width bins in the given range (10, by default). If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths, by default 10	`10`
`working_point`	`float`	Working point, by default None	`None`
`disc_cut`	`float or sequence of floats`	Cut value for discriminant, if it is a sequence it has to have the same length as number of bins, by default None	`None`
`flat_per_bin`	`bool`	If True and no `disc_cut` is given the signal efficiency is held constant in each bin, by default False	`False`
`key`	`str`	Identifier for the curve e.g. tagger, by default None	`None`
`**kwargs`	`kwargs`	Keyword arguments passed to `PlotLineObject`	`{}`

Raises:

Type	Description
`ValueError`	If provided options are not compatible with each other

Source code in puma/var_vs_eff.py

def __init__(
    self,
    x_var_sig: np.ndarray,
    disc_sig: np.ndarray,
    x_var_bkg: np.ndarray = None,
    disc_bkg: np.ndarray = None,
    bins=10,
    working_point: float | None = None,
    disc_cut=None,
    flat_per_bin: bool = False,
    key: str | None = None,
    **kwargs,
) -> None:
    """Initialise properties of roc curve object.

    Parameters
    ----------
    x_var_sig : np.ndarray
        Values for x-axis variable for signal
    disc_sig : np.ndarray
        Discriminant values for signal
    x_var_bkg : np.ndarray, optional
        Values for x-axis variable for background, by default None
    disc_bkg : np.ndarray, optional
        Discriminant values for background, by default None
    bins : int or sequence of scalars, optional
        If bins is an int, it defines the number of equal-width bins in the
        given range (10, by default). If bins is a sequence, it defines a
        monotonically increasing array of bin edges, including the
        rightmost edge, allowing for non-uniform bin widths, by default 10
    working_point : float, optional
        Working point, by default None
    disc_cut : float or  sequence of floats, optional
        Cut value for discriminant, if it is a sequence it has to have the same
        length as number of bins, by default None
    flat_per_bin : bool, optional
        If True and no `disc_cut` is given the signal efficiency is held constant
        in each bin, by default False
    key : str, optional
        Identifier for the curve e.g. tagger, by default None
    **kwargs : kwargs
        Keyword arguments passed to `PlotLineObject`

    Raises
    ------
    ValueError
        If provided options are not compatible with each other
    """
    if len(x_var_sig) != len(disc_sig):
        raise ValueError(
            f"Length of `x_var_sig` ({len(x_var_sig)}) and `disc_sig` "
            f"({len(disc_sig)}) have to be identical."
        )
    if x_var_bkg is not None and len(x_var_bkg) != len(disc_bkg):
        raise ValueError(
            f"Length of `x_var_bkg` ({len(x_var_bkg)}) and `disc_bkg` "
            f"({len(disc_bkg)}) have to be identical."
        )
    # checking that the given options are compatible
    # could also think about porting it to a class function insted of passing
    # the arguments to init e.g. `set_method`
    if working_point is None and disc_cut is None:
        raise ValueError("Either `wp` or `disc_cut` needs to be specified.")
    if flat_per_bin:
        if disc_cut is not None:
            raise ValueError(
                "You cannot specify `disc_cut` when `flat_per_bin` is set to True."
            )
        if working_point is None:
            raise ValueError(
                "You need to specify a working point `wp`, when `flat_per_bin` is"
                " set to True."
            )
    self.x_var_sig = np.array(x_var_sig)
    self.disc_sig = np.array(disc_sig)
    self.x_var_bkg = None if x_var_bkg is None else np.array(x_var_bkg)
    self.disc_bkg = None if disc_bkg is None else np.array(disc_bkg)
    self.working_point = working_point
    self.disc_cut = disc_cut
    self.flat_per_bin = flat_per_bin
    # Binning related variables
    self.n_bins = None
    self.bn_edges = None
    self.x_bin_centres = None
    self.bin_widths = None
    self.n_bins = None
    # Binned distributions
    self.bin_indices_sig = None
    self.disc_binned_sig = None
    self.bin_indices_bkg = None
    self.disc_binned_bkg = None

    self._set_bin_edges(bins)

    if disc_cut is not None:
        if working_point is not None:
            raise ValueError("You cannot specify `disc_cut` when providing `wp`.")
        if isinstance(disc_cut, (list, np.ndarray)) and self.n_bins != len(disc_cut):
            raise ValueError(
                "`disc_cut` has to be a float or has to have the same length as"
                " number of bins."
            )
    self._apply_binning()
    self._get_disc_cuts()

    VarVsVar.__init__(
        self,
        x_var=self.x_bin_centres,
        y_var_mean=np.zeros_like(self.x_bin_centres),
        y_var_std=np.zeros_like(self.x_bin_centres),
        x_var_widths=2 * self.bin_widths,
        key=key,
        fill=True,
        plot_y_std=False,
        **kwargs,
    )
    self.inverse_cut = False

`bkg_eff` `property` #

Calculate background efficiency per bin.

Returns:

Type	Description
`numpy.ndarray`	Efficiency
`numpy.ndarray`	Efficiency_error

`bkg_eff_sig_err` `property` #

Calculate signal efficiency per bin, assuming a flat background per bin. This results in returning the signal efficiency per bin, but the background error per bin.

`bkg_rej` `property` #

Calculate background rejection per bin.

Returns:

Type	Description
`numpy.ndarray`	Rejection
`numpy.ndarray`	Rejection_error

`sig_eff` `property` #

Calculate signal efficiency per bin.

Returns:

Type	Description
`numpy.ndarray`	Efficiency
`numpy.ndarray`	Efficiency_error

`sig_rej` `property` #

Calculate signal rejection per bin.

Returns:

Type	Description
`numpy.ndarray`	Rejection
`numpy.ndarray`	Rejection_error

`efficiency` #

Calculate efficiency and the associated error.

Parameters:

Name	Type	Description	Default
`arr`	`numpy.ndarray`	Array with discriminants	required
`cut`	`float`	Cut value	required

Returns:

Type	Description
`float`	Efficiency
`float`	Efficiency error

Source code in puma/var_vs_eff.py

def efficiency(self, arr: np.ndarray, cut: float):
    """Calculate efficiency and the associated error.

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float
        Cut value

    Returns
    -------
    float
        Efficiency
    float
        Efficiency error
    """
    if len(arr) == 0:
        return 0, 0
    eff = sum(arr < cut) / len(arr) if self.inverse_cut else sum(arr > cut) / len(arr)
    eff_error = eff_err(eff, len(arr))
    return eff, eff_error

`get` #

Wrapper around rejection and efficiency functions.

Parameters:

Name	Type	Description	Default
`mode`	`str`	Can be "sig_eff", "bkg_eff", "sig_rej", "bkg_rej", or "bkg_eff_sig_err"	required
`inverse_cut`	`bool`	Inverts the discriminant cut, which will yield the efficiency or rejection of the jets not passing the working point, by default False	`False`

Returns:

Type	Description
`numpy.ndarray`	Rejection or efficiency depending on `mode` value
`numpy.ndarray`	Rejection or efficiency error depending on `mode` value

Raises:

Type	Description
`ValueError`	If mode not supported

Source code in puma/var_vs_eff.py

def get(self, mode: str, inverse_cut: bool = False):
    """Wrapper around rejection and efficiency functions.

    Parameters
    ----------
    mode : str
        Can be "sig_eff", "bkg_eff", "sig_rej", "bkg_rej", or
        "bkg_eff_sig_err"
    inverse_cut : bool, optional
        Inverts the discriminant cut, which will yield the efficiency or rejection
        of the jets not passing the working point, by default False

    Returns
    -------
    np.ndarray
        Rejection or efficiency depending on `mode` value
    np.ndarray
        Rejection or efficiency error depending on `mode` value

    Raises
    ------
    ValueError
        If mode not supported
    """
    self.inverse_cut = inverse_cut
    if mode == "sig_eff":
        return self.sig_eff
    if mode == "bkg_eff":
        return self.bkg_eff
    if mode == "sig_rej":
        return self.sig_rej
    if mode == "bkg_rej":
        return self.bkg_rej
    if mode == "bkg_eff_sig_err":
        return self.bkg_eff_sig_err
    # setting class variable again to False
    self.inverse_cut = False
    raise ValueError(
        f"The selected mode {mode} is not supported. Use one of the following:"
        f" {VarVsEffPlot.mode_options}."
    )

`rejection` #

Calculate rejection and the associated error.

Parameters:

Name	Type	Description	Default
`arr`	`numpy.ndarray`	Array with discriminants	required
`cut`	`float`	Cut value	required

Returns:

Type	Description
`float`	Rejection
`float`	Rejection error

Source code in puma/var_vs_eff.py

def rejection(self, arr: np.ndarray, cut: float):
    """Calculate rejection and the associated error.

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float
        Cut value

    Returns
    -------
    float
        Rejection
    float
        Rejection error
    """
    if self.inverse_cut:
        rej = save_divide(len(arr), sum(arr < cut), default=np.inf)
    else:
        rej = save_divide(len(arr), sum(arr > cut), default=np.inf)
    if rej == np.inf:
        logger.warning("Your rejection is infinity -> setting it to np.nan.")
        return np.nan, np.nan
    rej_error = rej_err(rej, len(arr))
    return rej, rej_error

`puma.var_vs_eff.VarVsEffPlot` #

Bases: puma.var_vs_var.VarVsVarPlot

var_vs_eff plot class.

var_vs_eff plot properties.

Parameters:

Name	Type	Description	Default
`mode`	`str`	Defines which quantity is plotted, the following options ar available: sig_eff - Plots signal efficiency vs. variable, with statistical error on N signal per bin bkg_eff - Plots background efficiency vs. variable, with statistical error on N background per bin sig_rej - Plots signal rejection vs. variable, with statistical error on N signal per bin bkg_rej - Plots background rejection vs. variable, with statistical error on N background per bin bkg_eff_sig_err - Plots background efficiency vs. variable, with statistical error on N signal per bin.	required
`grid`	`bool`	Set the grid for the plots.	`False`
`**kwargs`	`kwargs`	Keyword arguments from `puma.PlotObject`	`{}`

Raises:

Type	Description
`ValueError`	If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_eff.py

def __init__(self, mode, grid: bool = False, **kwargs) -> None:
    """var_vs_eff plot properties.

    Parameters
    ----------
    mode : str
        Defines which quantity is plotted, the following options ar available:
            sig_eff - Plots signal efficiency vs. variable, with statistical error
                on N signal per bin
            bkg_eff - Plots background efficiency vs. variable, with statistical
                error on N background per bin
            sig_rej - Plots signal rejection vs. variable, with statistical error
                on N signal per bin
            bkg_rej - Plots background rejection vs. variable, with statistical
                error on N background per bin
            bkg_eff_sig_err - Plots background efficiency vs. variable, with
                statistical error on N signal per bin.
    grid : bool, optional
        Set the grid for the plots.
    **kwargs : kwargs
        Keyword arguments from `puma.PlotObject`

    Raises
    ------
    ValueError
        If incompatible mode given or more than 1 ratio panel requested
    """
    super().__init__(grid=grid, **kwargs)
    if mode not in self.mode_options:
        raise ValueError(
            f"The selected mode {mode} is not supported. Use one of the following: "
            f"{self.mode_options}."
        )
    self.mode = mode

`apply_modified_atlas_second_tag` #

Modifies the atlas_second_tag to include info on the type of p-eff plot being displayed.

Source code in puma/var_vs_eff.py

def apply_modified_atlas_second_tag(
    self,
    signal,
    working_point=None,
    disc_cut=None,
    flat_per_bin=False,
):
    """Modifies the atlas_second_tag to include info on the type of p-eff plot
    being displayed.
    """
    if working_point:
        mid_str = f"{round(working_point * 100, 3)}% " + signal.eff_str
    elif disc_cut:
        mid_str = rf"$D_{{{signal.name.rstrip('jets')}}}$ > {disc_cut}"
    tag = f"Flat {mid_str} per bin" if flat_per_bin else f"{mid_str}"
    if self.atlas_second_tag:
        self.atlas_second_tag = f"{self.atlas_second_tag}\n{tag}"
    else:
        self.atlas_second_tag = tag

`plot` #

Plotting curves.

Parameters:

Name	Type	Description	Default
`**kwargs`		Keyword arguments passed to plt.axis.errorbar	`{}`

Returns:

Type	Description
`puma.line_plot_2d.Line2D`	matplotlib Line2D object

Source code in puma/var_vs_eff.py

def plot(self, **kwargs):
    """Plotting curves.

    Parameters
    ----------
    **kwargs: kwargs
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves with mode %s", self.mode)
    self._setup_curves()
    return super().plot(**kwargs)

`puma.var_vs_vtx.VarVsVtx` #

Bases: puma.var_vs_var.VarVsVar

var_vs_vtx class storing info about vertexing performance.

Initialise properties of roc curve object.

Parameters:

Name	Type	Description	Default
`x_var`	`numpy.ndarray`	Values for x-axis variable for signal	required
`n_match`	`numpy.ndarray`	Values for number of correctly identified objects (where truth and reco match)	required
`n_true`	`numpy.ndarray`	Values for true number of objects	required
`n_reco`	`numpy.ndarray`	Values for reconstructed number of objects	required
`bins`	`int or sequence of scalars`	If bins is an int, it defines the number of equal-width bins in the given range (10, by default). If bins is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths, by default 10	`10`
`key`	`str`	Identifier for the curve e.g. tagger, by default None	`None`
`**kwargs`	`kwargs`	Keyword arguments passed to `PlotLineObject`	`{}`

Raises:

Type	Description
`ValueError`	If provided options are not compatible with each other

Source code in puma/var_vs_vtx.py

def __init__(
    self,
    x_var: np.ndarray,
    n_match: np.ndarray,
    n_true: np.ndarray,
    n_reco: np.ndarray,
    bins=10,
    key: str | None = None,
    **kwargs,
) -> None:
    """Initialise properties of roc curve object.

    Parameters
    ----------
    x_var : np.ndarray
        Values for x-axis variable for signal
    n_match : np.ndarray
        Values for number of correctly identified objects (where truth and
        reco match)
    n_true : np.ndarray
        Values for true number of objects
    n_reco : np.ndarray
        Values for reconstructed number of objects
    bins : int or sequence of scalars, optional
        If bins is an int, it defines the number of equal-width bins in the
        given range (10, by default). If bins is a sequence, it defines a
        monotonically increasing array of bin edges, including the
        rightmost edge, allowing for non-uniform bin widths, by default 10
    key : str, optional
        Identifier for the curve e.g. tagger, by default None
    **kwargs : kwargs
        Keyword arguments passed to `PlotLineObject`

    Raises
    ------
    ValueError
        If provided options are not compatible with each other
    """
    if len(x_var) != len(n_match):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_match` "
            f"({len(n_match)}) have to be identical."
        )
    if len(x_var) != len(n_true):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_true` "
            f"({len(n_true)}) have to be identical."
        )
    if len(x_var) != len(n_reco):
        raise ValueError(
            f"Length of `x_var` ({len(x_var)}) and `n_reco` "
            f"({len(n_reco)}) have to be identical."
        )

    self.x_var = np.array(x_var)
    self.n_match = np.array(n_match)
    self.n_true = np.array(n_true)
    self.n_reco = np.array(n_reco)
    # Binning related variables
    self.n_bins = None
    self.bin_edges = None
    self.x_bin_centres = None
    self.bin_widths = None
    # Binned distributions
    self.bin_indices = None
    self.metric_binned = None

    self._set_bin_edges(bins)
    self._apply_binning()

    VarVsVar.__init__(
        self,
        x_var=self.x_bin_centres,
        y_var_mean=np.zeros_like(self.x_bin_centres),
        y_var_std=np.zeros_like(self.x_bin_centres),
        x_var_widths=2 * self.bin_widths,
        key=key,
        fill=True,
        plot_y_std=False,
        **kwargs,
    )

`efficiency` `property` #

Calculate vertexing efficiency per bin. Defined as number of reconstructed vertices matched to truth divided by number of total true vertices.

Returns:

Type	Description
`numpy.ndarray`	Efficiency
`numpy.ndarray`	Efficiency error

`fakes` `property` #

Calculate vertexing fake rate per bin. Defined as total number of events with reconstructed vertices where vertices are not expected.

Returns:

Type	Description
`numpy.ndarray`	Fake rate
`numpy.ndarray`	Fake rate error

`purity` `property` #

Calculate vertexing purity per bin. Defined as number of reconstructed vertices matched to truth divided by number of total reconstructed vertices.

Returns:

Type	Description
`numpy.ndarray`	Purity
`numpy.ndarray`	Purity error

`get` #

Wrapper around rejection and efficiency functions.

Parameters:

Name	Type	Description	Default
`mode`	`str`	Can be "efficiency", "purity" or "fakes"	required

Returns:

Type	Description
`numpy.ndarray`	Efficiency, purity or fake rate depending on `mode` value
`numpy.ndarray`	Efficiency, purity or fake rate error depending on `mode` value

Raises:

Type	Description
`ValueError`	If mode not supported

Source code in puma/var_vs_vtx.py

def get(self, mode: str):
    """Wrapper around rejection and efficiency functions.

    Parameters
    ----------
    mode : str
        Can be "efficiency", "purity" or "fakes"

    Returns
    -------
    np.ndarray
        Efficiency, purity or fake rate depending on `mode` value
    np.ndarray
        Efficiency, purity or fake rate error depending on `mode` value

    Raises
    ------
    ValueError
        If mode not supported
    """
    if mode == "efficiency":
        return self.efficiency
    if mode == "purity":
        return self.purity
    if mode == "fakes":
        return self.fakes
    raise ValueError(
        f"The selected mode {mode} is not supported. Use one of the following:"
        f" {VarVsVtxPlot.mode_options}."
    )

`get_performance_ratio` #

Calculate performance ratio for vertexing task. Either n_matched/n_true (efficiency) or n_matched/n_reco (purity).

Parameters:

Name	Type	Description	Default
`arr`	`numpy.ndarray`	Array with discriminants	required
`cut`	`float`	Cut value	required

Returns:

Type	Description
`float`	Performance ratio
`float`	Performance ratio error

Source code in puma/var_vs_vtx.py

def get_performance_ratio(self, num: np.ndarray, denom: np.ndarray):
    """Calculate performance ratio for vertexing task. Either n_matched/n_true
    (efficiency) or n_matched/n_reco (purity).

    Parameters
    ----------
    arr : np.ndarray
        Array with discriminants
    cut : float
        Cut value

    Returns
    -------
    float
        Performance ratio
    float
        Performance ratio error
    """
    pm = save_divide(np.sum(num), np.sum(denom), default=np.inf)
    if pm == np.inf:
        logger.warning("Your vertexing performance ratio is infinity -> setting it to np.nan.")
        return np.nan, np.nan
    if pm == 0:
        logger.warning("Your vertexing performance ratio is zero -> setting error to zero.")
        return 0.0, 0.0
    pm_error = eff_err(pm, len(num))
    return pm, pm_error

`puma.var_vs_vtx.VarVsVtxPlot` #

Bases: puma.var_vs_var.VarVsVarPlot

var_vs_vtx plot properties.

Parameters:

Name	Type	Description	Default
`mode`	`str`	Defines which quantity is plotted, the following options ar available: efficiency - Plots efficiency vs. variable for jets where vertices are expected purity - Plots purity vs. variable for jets where vertices are expected fakes - Plots fake rate vs. variable for jets where vertices are not expected	required
`grid`	`bool`	Set the grid for the plots.	`False`
`**kwargs`	`kwargs`	Keyword arguments from `puma.PlotObject`	`{}`

Raises:

Type	Description
`ValueError`	If incompatible mode given or more than 1 ratio panel requested

Source code in puma/var_vs_vtx.py

def __init__(self, mode, grid: bool = False, **kwargs) -> None:
    """var_vs_vtx plot properties.

    Parameters
    ----------
    mode : str
        Defines which quantity is plotted, the following options ar available:
            efficiency - Plots efficiency vs. variable for jets where vertices are
            expected
            purity - Plots purity vs. variable for jets where vertices are expected
            fakes - Plots fake rate vs. variable for jets where vertices are not
            expected
    grid : bool, optional
        Set the grid for the plots.
    **kwargs : kwargs
        Keyword arguments from `puma.PlotObject`

    Raises
    ------
    ValueError
        If incompatible mode given or more than 1 ratio panel requested
    """
    super().__init__(grid=grid, **kwargs)
    if mode not in self.mode_options:
        raise ValueError(
            f"The selected mode {mode} is not supported. Use one of the following: "
            f"{self.mode_options}."
        )
    self.mode = mode

`plot` #

Plotting curves.

Parameters:

Name	Type	Description	Default
`**kwargs`		Keyword arguments passed to plt.axis.errorbar	`{}`

Returns:

Type	Description
`puma.line_plot_2d.Line2D`	matplotlib Line2D object

Source code in puma/var_vs_vtx.py

def plot(self, **kwargs):
    """Plotting curves.

    Parameters
    ----------
    **kwargs: kwargs
        Keyword arguments passed to plt.axis.errorbar

    Returns
    -------
    Line2D
        matplotlib Line2D object
    """
    logger.debug("Plotting curves with mode %s", self.mode)
    self._setup_curves()
    return super().plot(**kwargs)

Variable vs Variable

puma.var_vs_var.VarVsVar #

divide #

puma.var_vs_var.VarVsVarPlot #

add #

draw #

draw_hline #

plot #

plot_ratios #

set_reference #

puma.var_vs_eff.VarVsEff #

bkg_eff property #

bkg_eff_sig_err property #

bkg_rej property #

sig_eff property #

sig_rej property #

efficiency #

get #

rejection #

puma.var_vs_eff.VarVsEffPlot #

apply_modified_atlas_second_tag #

plot #

puma.var_vs_vtx.VarVsVtx #

efficiency property #

fakes property #

purity property #

get #

get_performance_ratio #

puma.var_vs_vtx.VarVsVtxPlot #

plot #

`puma.var_vs_var.VarVsVar` #

`divide` #

`puma.var_vs_var.VarVsVarPlot` #

`add` #

`draw` #

`draw_hline` #

`plot` #

`plot_ratios` #

`set_reference` #

`puma.var_vs_eff.VarVsEff` #

`bkg_eff` `property` #

`bkg_eff_sig_err` `property` #

`bkg_rej` `property` #

`sig_eff` `property` #

`sig_rej` `property` #

`efficiency` #

`get` #

`rejection` #

`puma.var_vs_eff.VarVsEffPlot` #

`apply_modified_atlas_second_tag` #

`plot` #

`puma.var_vs_vtx.VarVsVtx` #

`efficiency` `property` #

`fakes` `property` #

`purity` `property` #

`get` #

`get_performance_ratio` #

`puma.var_vs_vtx.VarVsVtxPlot` #

`plot` #