Coverage for instanovo/scripts/update_ipc_format.py: 100%
24 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-12-08 07:26 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-12-08 07:26 +0000
1# /// script
2# requires-python = ">=3.10"
3# dependencies = [
4# "polars",
5# ]
6# ///
7from __future__ import annotations
9import argparse
10from pathlib import Path
12import polars as pl
14from instanovo.__init__ import console
15from instanovo.utils.colorlogging import ColorLog
17logger = ColorLog(console, __name__).logger
20def update_ipc(source: Path, target: Path) -> pl.DataFrame:
21 """Update .ipc file to new schema format."""
22 if not source.suffix.lower().endswith("ipc"):
23 raise ValueError("Incorrect file type - .ipc file required.")
25 logger.info(f"Processing {source}.")
27 df = pl.read_ipc(source=source)
28 df = df.rename(
29 {
30 "Sequence": "sequence",
31 "Modified sequence": "modified_sequence",
32 "MS/MS m/z": "precursor_mz",
33 "Charge": "precursor_charge",
34 "Intensity": "intensity_array",
35 "Mass values": "mz_array",
36 }
37 )
39 df = df.with_columns(df["modified_sequence"].str.strip_chars("_"))
41 Path(target).parent.mkdir(parents=True, exist_ok=True)
42 df.write_ipc(target)
45def main() -> None:
46 """Update ipc file."""
47 parser = argparse.ArgumentParser()
49 parser.add_argument("source", help="source file or folder")
50 parser.add_argument("target", help="target ipc file to be saved")
52 args = parser.parse_args()
54 source = Path(args.source)
55 target = Path(args.target)
57 update_ipc(source, target)
60if __name__ == "__main__":
61 main()