Skip to content

Model

model

MODEL_TYPE = 'transformer' module-attribute

logger = ColorLog(console, __name__).logger module-attribute

InstaNovo(residue_set: ResidueSet, dim_model: int = 768, n_head: int = 16, dim_feedforward: int = 2048, encoder_layers: int = 9, decoder_layers: int = 9, dropout: float = 0.1, max_charge: int = 5, use_flash_attention: bool = False, conv_peak_encoder: bool = False, peak_embedding_dtype: torch.dtype | str = torch.float64)

Bases: Module, Decodable

The Instanovo model.

vocab_size = len(residue_set) instance-attribute

use_flash_attention = use_flash_attention instance-attribute

conv_peak_encoder = conv_peak_encoder instance-attribute

latent_spectrum = nn.Parameter(torch.randn(1, 1, dim_model)) instance-attribute

pad_spectrum = nn.Parameter(torch.randn(1, 1, dim_model)) instance-attribute

peak_encoder = MultiScalePeakEmbedding(dim_model, dropout=dropout, float_dtype=peak_embedding_dtype) instance-attribute

conv_encoder = ConvPeakEmbedding(dim_model, dropout=dropout) instance-attribute

encoder = nn.TransformerEncoder(encoder_layer, num_layers=encoder_layers) instance-attribute

aa_embed = nn.Embedding(self.vocab_size, dim_model, padding_idx=0) instance-attribute

aa_pos_embed = PositionalEncoding(dim_model, dropout, max_len=MAX_SEQUENCE_LENGTH) instance-attribute

decoder = nn.TransformerDecoder(decoder_layer, num_layers=decoder_layers) instance-attribute

head = nn.Linear(dim_model, self.vocab_size) instance-attribute

charge_encoder = nn.Embedding(max_charge, dim_model) instance-attribute

residue_set: ResidueSet property

Every model must have a residue_set attribute.

get_pretrained() -> list[str] staticmethod

Get a list of pretrained model ids.

load(path: str, update_residues_to_unimod: bool = True, override_config: DictConfig | dict | None = None) -> tuple['InstaNovo', 'DictConfig'] classmethod

Load model from checkpoint path.

PARAMETER DESCRIPTION
path

Path to checkpoint file.

TYPE: str

update_residues_to_unimod

Update residues to unimod, defaults to True.

TYPE: bool DEFAULT: True

override_config

Optional override config values with a DictConfig or dict, defaults to None.

TYPE: DictConfig | dict | None DEFAULT: None

RETURNS DESCRIPTION
tuple['InstaNovo', 'DictConfig']

tuple[InstaNovo, DictConfig]: Tuple of model and config.

from_pretrained(model_id: str, update_residues_to_unimod: bool = True, override_config: DictConfig | dict | None = None) -> tuple['InstaNovo', 'DictConfig'] classmethod

Download and load by model id or model path.

PARAMETER DESCRIPTION
model_id

Model id or model path.

TYPE: str

update_residues_to_unimod

Update residues to unimod, defaults to True.

TYPE: bool DEFAULT: True

override_config

Optional override config values with a DictConfig or dict, defaults to None.

TYPE: DictConfig | dict | None DEFAULT: None

RETURNS DESCRIPTION
tuple['InstaNovo', 'DictConfig']

tuple[InstaNovo, DictConfig]: Tuple of model and config.

forward(x: Float[Spectrum, ' batch'], p: Float[PrecursorFeatures, ' batch'], y: Integer[Peptide, ' batch'], x_mask: Optional[Bool[SpectrumMask, ' batch']] = None, y_mask: Optional[Bool[PeptideMask, ' batch']] = None, add_bos: bool = True, return_encoder_output: bool = False) -> Float[ResidueLogits, 'batch token+1']

Model forward pass.

PARAMETER DESCRIPTION
x

Spectra, float Tensor (batch, n_peaks, 2)

TYPE: Float[Spectrum, ' batch']

p

Precursors, float Tensor (batch, 3)

TYPE: Float[PrecursorFeatures, ' batch']

y

Peptide, long Tensor (batch, seq_len, vocab)

TYPE: Integer[Peptide, ' batch']

x_mask

Spectra padding mask, True for padded indices, bool Tensor (batch, n_peaks)

TYPE: Optional[Bool[SpectrumMask, ' batch']] DEFAULT: None

y_mask

Peptide padding mask, bool Tensor (batch, seq_len)

TYPE: Optional[Bool[PeptideMask, ' batch']] DEFAULT: None

add_bos

Force add a prefix to y, bool

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
logits

float Tensor (batch, n, vocab_size),

TYPE: Float[ResidueLogits, 'batch token+1']

Float[ResidueLogits, 'batch token+1']

(batch, n+1, vocab_size) if add_bos==True.

init(spectra: Float[Spectrum, ' batch'], precursors: Float[PrecursorFeatures, ' batch'], spectra_mask: Optional[Bool[SpectrumMask, ' batch']] = None) -> Tuple[Tuple[Float[Spectrum, ' batch'], Bool[SpectrumMask, ' batch']], Float[ResidueLogProbabilities, 'batch token']]

Initialise model encoder.

score_candidates(sequences: Integer[Peptide, ' batch'], precursor_mass_charge: Float[PrecursorFeatures, ' batch'], spectra: Float[Spectrum, ' batch'], spectra_mask: Bool[SpectrumMask, ' batch']) -> Float[ResidueLogProbabilities, 'batch token']

Score a set of candidate sequences.

get_residue_masses(mass_scale: int) -> Integer[DiscretizedMass, ' residue']

Get the scaled masses of all residues.

get_eos_index() -> int

Get the EOS token ID.

get_empty_index() -> int

Get the PAD token ID.

decode(sequence: Peptide) -> list[str]

Decode a single sequence of AA IDs.

idx_to_aa(idx: Peptide) -> list[str]

Decode a single sample of indices to aa list.

batch_idx_to_aa(idx: Integer[Peptide, ' batch'], reverse: bool) -> list[list[str]]

Decode a batch of indices to aa lists.

score_sequences(peptides: Integer[Peptide, ' batch'] | list[str] | list[list[str]], peptides_mask: Bool[PeptideMask, ' batch'] | None = None, spectra: Float[Spectrum, ' batch'] | None = None, precursors: Float[PrecursorFeatures, ' batch'] | None = None, spectra_mask: Bool[SpectrumMask, ' batch'] | None = None, spectra_embedding: Float[SpectrumEmbedding, ' batch'] | None = None, max_batch_size: int = 256) -> Float[ResidueLogProbabilities, 'batch token']

Score a set of peptides.