Coverage for instanovo/scripts/update_ipc_format.py: 100%

24 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-12-08 07:26 +0000

1# /// script 

2# requires-python = ">=3.10" 

3# dependencies = [ 

4# "polars", 

5# ] 

6# /// 

7from __future__ import annotations 

8 

9import argparse 

10from pathlib import Path 

11 

12import polars as pl 

13 

14from instanovo.__init__ import console 

15from instanovo.utils.colorlogging import ColorLog 

16 

17logger = ColorLog(console, __name__).logger 

18 

19 

20def update_ipc(source: Path, target: Path) -> pl.DataFrame: 

21 """Update .ipc file to new schema format.""" 

22 if not source.suffix.lower().endswith("ipc"): 

23 raise ValueError("Incorrect file type - .ipc file required.") 

24 

25 logger.info(f"Processing {source}.") 

26 

27 df = pl.read_ipc(source=source) 

28 df = df.rename( 

29 { 

30 "Sequence": "sequence", 

31 "Modified sequence": "modified_sequence", 

32 "MS/MS m/z": "precursor_mz", 

33 "Charge": "precursor_charge", 

34 "Intensity": "intensity_array", 

35 "Mass values": "mz_array", 

36 } 

37 ) 

38 

39 df = df.with_columns(df["modified_sequence"].str.strip_chars("_")) 

40 

41 Path(target).parent.mkdir(parents=True, exist_ok=True) 

42 df.write_ipc(target) 

43 

44 

45def main() -> None: 

46 """Update ipc file.""" 

47 parser = argparse.ArgumentParser() 

48 

49 parser.add_argument("source", help="source file or folder") 

50 parser.add_argument("target", help="target ipc file to be saved") 

51 

52 args = parser.parse_args() 

53 

54 source = Path(args.source) 

55 target = Path(args.target) 

56 

57 update_ipc(source, target) 

58 

59 

60if __name__ == "__main__": 

61 main()