Skip to content

Variable vs efficiency plots#

In the following a small example how to plot the efficiency vs a specific variable. In this case, we use pt as this variable.

Then we can start the actual plotting part

"""Produce pT vs efficiency plot from tagger output and labels."""

from __future__ import annotations

import numpy as np
from ftag import Flavours
from ftag.utils import get_discriminant

from puma import VarVsEff, VarVsEffPlot
from puma.utils import get_dummy_2_taggers, logger

# The line below generates dummy data which is similar to a NN output
df = get_dummy_2_taggers(add_pt=True)

logger.info("caclulate tagger discriminants")
discs_dips = get_discriminant(
    jets=df,
    tagger="dips",
    signal=Flavours["bjets"],
    flavours=Flavours.by_category("single-btag"),
    fraction_values={
        "fc": 0.018,
        "fu": 0.982,
        "ftau": 0,
    },
)
discs_rnnip = get_discriminant(
    jets=df,
    tagger="rnnip",
    signal=Flavours["bjets"],
    flavours=Flavours.by_category("single-btag"),
    fraction_values={
        "fc": 0.018,
        "fu": 0.982,
        "ftau": 0,
    },
)

# Getting jet pt in GeV
pt = df["pt"] / 1e3

# defining target efficiency
sig_eff = np.linspace(0.49, 1, 20)

# defining boolean arrays to select the different flavour classes
is_b = Flavours["bjets"].cuts(df).idx
is_c = Flavours["cjets"].cuts(df).idx
is_light = Flavours["ujets"].cuts(df).idx
it_tau = Flavours["taujets"].cuts(df).idx

# here the plotting starts

# Define the curves. The signal is here bjets (so b-tagging) and we want
# to look a the light-jet rejection (so background is light jets)
rnnip_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_rnnip[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_rnnip[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=0.7,
    fixed_bkg_rej=None,
    disc_cut=None,
    flat_per_bin=False,
    label="RNNIP",
)
dips_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_dips[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_dips[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=0.7,
    fixed_bkg_rej=None,
    disc_cut=None,
    flat_per_bin=False,
    label="DIPS",
)

# You can also store and load now the different VarVsEff curves
# Supported formats are .yaml and .json, which are human-readable
# and can be changed before loading them back in.
dips_light.save("dips_light.yaml")
rnnip_light.save("rnnip_light.yaml")

# To load them again, you simply call the class with load().
# Once loaded, you can use them as before. All needed info for plotting are still there
dips_light_loaded = VarVsEff.load("dips_light.yaml")
rnnip_light_loaded = VarVsEff.load("rnnip_light.yaml")

# Now to the actual plotting
logger.info("Plotting bkg rejection for inclusive efficiency as a function of pt.")

# You can choose between different modes:
# "sig_eff": Plots the signal efficiency as a function of your variable
# "bkg_eff": Plots the background mis-tag efficiency as a function of your variable
# "sig_rej": Plots the signal rejection (how much you miss) as a function of your variable
# "bkg_rej": Plots the background rejection as a function of your variable

# We go for light-flavour rejection here and define the actual plot object
plot_bkg_rej = VarVsEffPlot(
    mode="bkg_rej",
    ylabel="Light-flavour jets rejection",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_bkg_rej.add(rnnip_light, reference=True)
plot_bkg_rej.add(dips_light)

# Draw the actual curves in the plot object
plot_bkg_rej.draw()

# Save the plot as png
plot_bkg_rej.savefig("pt_light_rej.png")

# Now we also want to plot the signal efficiency. Init a new plot object with "sig_eff"
plot_sig_eff = VarVsEffPlot(
    mode="sig_eff",
    ylabel="$b$-jets efficiency",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets, \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_sig_eff.add(rnnip_light, reference=True)
plot_sig_eff.add(dips_light)

# Adapt the text under the ATLAS logo in the plots to say that we used the 70% working point
plot_sig_eff.atlas_second_tag += "\nInclusive $\\epsilon_b=70\\%$"

# If you want to inverse the discriminant cut you can enable it via
# plot_sig_eff.set_inverse_cut()

# Draw the actual curves in the plot object
plot_sig_eff.draw()

# Drawing a hline indicating the inclusive efficiency working point
plot_sig_eff.draw_hline(0.7)

# Save the plot as png. Transparent false will colour the background of the plot white
plot_sig_eff.savefig("pt_b_eff.png", transparent=False)

# We can also plot the performance non-inclusively, so instead of having a global
# signal efficiency of 70% for the signal, we can enforce a 70% signal efficiency in each bin
# For that, we need to redefine our curves by setting the option "flat_per_bin" to True
rnnip_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_rnnip[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_rnnip[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=0.7,
    fixed_bkg_rej=None,
    disc_cut=None,
    flat_per_bin=True,
    label="RNNIP",
)
dips_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_dips[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_dips[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=0.7,
    fixed_bkg_rej=None,
    disc_cut=None,
    flat_per_bin=True,
    label="DIPS",
)

# Now we can plot the signal efficiency and background rejection
plot_sig_eff = VarVsEffPlot(
    mode="sig_eff",
    ylabel="$b$-jets efficiency",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets, \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_sig_eff.add(rnnip_light, reference=True)
plot_sig_eff.add(dips_light)

# Adapt the text under the ATLAS logo in the plots to say that we used the 70% working point
plot_sig_eff.atlas_second_tag += "\nFixed $\\epsilon_b=70\\% per bin$"

# Draw the actual curves in the plot object
plot_sig_eff.draw()

# Save the plot as png
plot_sig_eff.savefig("pt_b_eff_fixed_per_bin.png")

# You will see the plot show now the same signal efficiency for each bin
# Now to the background rejection
plot_bkg_rej = VarVsEffPlot(
    mode="bkg_rej",
    ylabel="Light-flavour jets rejection",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_bkg_rej.add(rnnip_light, reference=True)
plot_bkg_rej.add(dips_light)

# Draw the actual curves in the plot object
plot_bkg_rej.draw()

# Save the plot as png
plot_bkg_rej.savefig("pt_light_rej_fixed_per_bin.png")

# Instead of fixing the signal efficiency to a working point, like 70%, you can also
# fix the background rejection. Redefine the curves to enable this
rnnip_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_rnnip[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_rnnip[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=None,
    fixed_bkg_rej=100,
    disc_cut=None,
    flat_per_bin=False,
    label="RNNIP",
)
dips_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_dips[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_dips[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=None,
    fixed_bkg_rej=100,
    disc_cut=None,
    flat_per_bin=False,
    label="DIPS",
)

# Now we can plot the signal efficiency and background rejection
plot_sig_eff = VarVsEffPlot(
    mode="sig_eff",
    ylabel="$b$-jets efficiency",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets, \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_sig_eff.add(rnnip_light, reference=True)
plot_sig_eff.add(dips_light)

# Adapt the text under the ATLAS logo in the plots to say that we used the 70% working point
plot_sig_eff.atlas_second_tag += "\nFixed Rejection 100$"

# Draw the actual curves in the plot object
plot_sig_eff.draw()

# Save the plot as png
plot_sig_eff.savefig("pt_b_eff_rej_100.png")

# You will see the plot show now the same signal efficiency for each bin
# Now to the background rejection
plot_bkg_rej = VarVsEffPlot(
    mode="bkg_rej",
    ylabel="Light-flavour jets rejection",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_bkg_rej.add(rnnip_light, reference=True)
plot_bkg_rej.add(dips_light)

# Draw the actual curves in the plot object
plot_bkg_rej.draw()

# Save the plot as png
plot_bkg_rej.savefig("pt_light_rej_rej_100.png")

# Similar to the working point, also a fixed rejection per bin is possible
rnnip_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_rnnip[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_rnnip[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=None,
    fixed_bkg_rej=100,
    disc_cut=None,
    flat_per_bin=True,
    label="RNNIP",
)
dips_light = VarVsEff(
    x_var_sig=pt[is_b],
    disc_sig=discs_dips[is_b],
    x_var_bkg=pt[is_light],
    disc_bkg=discs_dips[is_light],
    bins=[20, 30, 40, 60, 85, 110, 140, 175, 250],
    working_point=None,
    fixed_bkg_rej=100,
    disc_cut=None,
    flat_per_bin=True,
    label="DIPS",
)

# Now we can plot the signal efficiency and background rejection
plot_sig_eff = VarVsEffPlot(
    mode="sig_eff",
    ylabel="$b$-jets efficiency",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets, \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_sig_eff.add(rnnip_light, reference=True)
plot_sig_eff.add(dips_light)

# Adapt the text under the ATLAS logo in the plots to say that we used the 70% working point
plot_sig_eff.atlas_second_tag += "\nFixed Rejection 100 per bin$"

# Draw the actual curves in the plot object
plot_sig_eff.draw()

# Save the plot as png
plot_sig_eff.savefig("pt_b_eff_rej_100_per_bin.png")

# You will see the plot show now the same signal efficiency for each bin
# Now to the background rejection
plot_bkg_rej = VarVsEffPlot(
    mode="bkg_rej",
    ylabel="Light-flavour jets rejection",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_bkg_rej.add(rnnip_light, reference=True)
plot_bkg_rej.add(dips_light)

# Draw the actual curves in the plot object
plot_bkg_rej.draw()

# Save the plot as png
plot_bkg_rej.savefig("pt_light_rej_rej_100_per_bin.png")

# Another nice feature for efficiencies where the rejection is fixed, is to show the absolute
# difference. This can be done with the ratio_method "subtract"
plot_sig_eff = VarVsEffPlot(
    mode="sig_eff",
    ylabel="$b$-jets efficiency",
    xlabel=r"$p_{T}$ [GeV]",
    logy=False,
    atlas_second_tag="$\\sqrt{s}=13$ TeV, dummy jets, \ndummy sample, $f_{c}=0.018$",
    n_ratio_panels=1,
    ratio_method="subtract",
    ylabel_ratio="Difference",
)

# Adding now the two already-defined curves and set RNNIP to be the reference
plot_sig_eff.add(rnnip_light, reference=True)
plot_sig_eff.add(dips_light)

# Draw the actual curves in the plot object
plot_sig_eff.draw()

# Save the plot as png
plot_sig_eff.savefig("pt_light_rej_rej_100_per_bin_subtracted.png")