Metrics

`Metrics(residues, isotope_error_range, cum_mass_threshold=0.5, ind_mass_threshold=0.1)`

Peptide metrics class.

Source code in instanovo/utils/metrics.py

def __init__(
    self,
    residues: dict[str, float],
    isotope_error_range: list[int],
    cum_mass_threshold: float = 0.5,
    ind_mass_threshold: float = 0.1,
) -> None:
    self.residues = residues
    self.isotope_error_range = isotope_error_range
    self.cum_mass_threshold = cum_mass_threshold
    self.ind_mass_threshold = ind_mass_threshold

`calc_auc(targs, preds, conf)`

Calculate the peptide-level AUC.

Source code in instanovo/utils/metrics.py

def calc_auc(
    self,
    targs: list[str] | list[list[str]],
    preds: list[str] | list[list[str]],
    conf: list[float],
) -> float:
    """Calculate the peptide-level AUC."""
    x, y = self._get_pr_curve(targs, preds, conf)
    recall, precision = np.array(x)[::-1], np.array(y)[::-1]

    width = recall[1:] - recall[:-1]
    height = np.minimum(precision[1:], precision[:-1])
    top = np.maximum(precision[1:], precision[:-1])
    side = top - height
    return (width * height).sum() + 0.5 * (side * width).sum()  # type: ignore

`compute_aa_er(peptides_truth, peptides_predicted)`

Compute amino-acid level error-rate.

Source code in instanovo/utils/metrics.py

def compute_aa_er(
    self,
    peptides_truth: list[str] | list[list[str]],
    peptides_predicted: list[str] | list[list[str]],
) -> float:
    """Compute amino-acid level error-rate."""
    # Ensure amino acids are separated
    peptides_truth = self._split_sequences(peptides_truth)
    peptides_predicted = self._split_sequences(peptides_predicted)

    return float(
        jiwer.wer(
            [" ".join(x) for x in peptides_truth], [" ".join(x) for x in peptides_predicted]
        )
    )

`compute_precision_recall(targets, predictions, confidence=None, threshold=None)`

Calculate precision and recall at peptide- and AA-level.

Parameters:

Name	Type	Description	Default
`targets`	`list[str] \| list[list[str]]`	Target peptides.	required
`predictions`	`list[str] \| list[list[str]]`	Model predicted peptides.	required
`confidence`	`list[float] \| None`	Optional model confidence.	`None`
`threshold`	`float \| None`	Optional confidence threshold.	`None`

Source code in instanovo/utils/metrics.py

def compute_precision_recall(
    self,
    targets: list[str] | list[list[str]],
    predictions: list[str] | list[list[str]],
    confidence: list[float] | None = None,
    threshold: float | None = None,
) -> tuple[float, float, float, float]:
    """Calculate precision and recall at peptide- and AA-level.

    Args:
        targets (list[str] | list[list[str]]): Target peptides.
        predictions (list[str] | list[list[str]]): Model predicted peptides.
        confidence (list[float] | None): Optional model confidence.
        threshold (float | None): Optional confidence threshold.
    """
    targets = self._split_sequences(targets)
    predictions = self._split_sequences(predictions)

    n_targ_aa, n_pred_aa, n_match_aa = 0, 0, 0
    n_pred_pep, n_match_pep = 0, 0

    if confidence is None or threshold is None:
        threshold = 0
        confidence = np.ones(len(predictions))

    for i in range(len(targets)):
        targ = self._split_peptide(targets[i])
        pred = self._split_peptide(predictions[i])
        conf = confidence[i]  # type: ignore

        if pred[0] == "":
            pred = []

        n_targ_aa += len(targ)
        if conf >= threshold and len(pred) > 0:
            n_pred_aa += len(pred)
            n_pred_pep += 1

            # pred = [x.replace('I', 'L') for x in pred]
            # n_match_aa += np.sum([m[0]==' ' for m in difflib.ndiff(targ,pred)])
            n_match = self._novor_match(targ, pred)
            n_match_aa += n_match

            if len(pred) == len(targ) and len(targ) == n_match:
                n_match_pep += 1

    pep_recall = n_match_pep / len(targets)
    aa_recall = n_match_aa / n_targ_aa

    if n_pred_pep == 0:
        pep_precision = 1.0
        aa_prec = 1.0
    else:
        pep_precision = n_match_pep / n_pred_pep
        aa_prec = n_match_aa / n_pred_aa

    return aa_prec, aa_recall, pep_recall, pep_precision

`matches_precursor(seq, prec_mass, prec_charge, prec_tol=50)`

Check if a sequence matches the precursor mass within some tolerance.

Source code in instanovo/utils/metrics.py

def matches_precursor(
    self, seq: str | list[str], prec_mass: float, prec_charge: int, prec_tol: int = 50
) -> tuple[bool, list[float]]:
    """Check if a sequence matches the precursor mass within some tolerance."""
    seq_mass = self._mass(seq, charge=prec_charge)
    delta_mass_ppm = [
        self._calc_mass_error(seq_mass, prec_mass, prec_charge, isotope)
        for isotope in range(
            self.isotope_error_range[0],
            self.isotope_error_range[1] + 1,
        )
    ]
    return any(abs(d) < prec_tol for d in delta_mass_ppm), delta_mass_ppm