Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0af520a

Browse files
authored
Merge pull request#349 from bcgsc/Release/v3.1.1
Release/v3.1.1
2 parentsdbbd9a3 +fb4b645 commit0af520a

File tree

72 files changed

+845
-128
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+845
-128
lines changed

‎.coveragerc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,8 @@ concurrency = multiprocessing
55
[html]
66
directory = coverage
77
title = mavis coverage report
8+
9+
[report]
10+
exclude_lines =
11+
pragma: no cover
12+
if TYPE_CHECKING:

‎.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ on:
1212

1313
jobs:
1414
build:
15-
runs-on:ubuntu-latest
15+
runs-on:ubuntu-20.04
1616
strategy:
1717
matrix:
1818
python-version:["3.7", "3.8", "3.9", "3.10"]

‎.github/workflows/quick-tests.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on: [push]
77

88
jobs:
99
build:
10-
runs-on:ubuntu-latest
10+
runs-on:ubuntu-20.04
1111
strategy:
1212
matrix:
1313
python-version:["3.7", "3.8", "3.9", "3.10"]
@@ -26,12 +26,16 @@ jobs:
2626
run:|
2727
pip install flake8
2828
# stop the build if there are Python syntax errors or undefined names
29-
flake8 src/mavis --count --select=E9,F63,F7,F82 --show-source --statistics
29+
flake8 src tests --count --show-source --statistics
3030
-name:Lint with black
3131
run:|
3232
pip install black
3333
# stop the build if black needs to be run
34-
black src/mavis -S -l 100 --check
34+
black src tests -S -l 100 --check
35+
-name:Lint with isort
36+
run:|
37+
pip install isort
38+
isort src tests --check
3539
-name:install bwa
3640
run:|
3741
git clone https://github.com/lh3/bwa.git

‎.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ junit
1616
*eggs/
1717
.mypy_cache
1818
.snakemake
19+
.venv*
1920

2021
# aligners
2122
blat

‎docs/outputs/columns.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,3 +610,15 @@ non-specific events.
610610
Flag to indicate if the
611611
current event was a supplementary call, meaning a call that was
612612
found as a result of validating another event.
613+
614+
##dgv
615+
616+
**type**:`str`
617+
618+
ID(s) of SVs from dgv database matched to a SV call from the summary step
619+
620+
##known\_sv\_count
621+
622+
**type**:`int`
623+
624+
Number of known SVs matched to a call in the summary step

‎setup.cfg

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = mavis
3-
version = 3.1.0
3+
version = 3.1.1
44
url = https://github.com/bcgsc/mavis.git
55
download_url = https://github.com/bcgsc/mavis/archive/v2.2.10.tar.gz
66
description = A Structural Variant Post-Processing Package
@@ -25,6 +25,9 @@ statistics = True
2525
[flake8]
2626
ignore = E501,W503,E203
2727

28+
[isort]
29+
profile = black
30+
2831
[options]
2932
packages = find:
3033
package_dir =
@@ -71,6 +74,7 @@ test =
7174
dev =
7275
black
7376
flake8
77+
isort
7478
twine
7579
wheel
7680
timeout-decorator>=0.3.3

‎src/mavis/annotate/file_io.py

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
importjson
55
importos
66
importre
7-
fromtypingimportCallable,Dict,List,Optional
7+
importwarnings
8+
fromtypingimportTYPE_CHECKING,Callable,Dict,List,Optional
89

910
importpandasaspd
1011
fromBioimportSeqIO
@@ -13,34 +14,37 @@
1314
from ..constantsimportCODON_SIZE,GIEMSA_STAIN,START_AA,STOP_AA,STRAND,translate
1415
from ..intervalimportInterval
1516
from ..typesimportReferenceAnnotations,ReferenceGenome
16-
from ..utilimportlogger
17+
from ..utilimportlogger,read_bpp_from_input_file
1718
from .baseimportBioInterval,ReferenceName
1819
from .genomicimportExon,Gene,PreTranscript,Template,Transcript
1920
from .proteinimportDomain,Translation
2021

22+
ifTYPE_CHECKING:
23+
from ..breakpointimportBreakpointPair
24+
2125

2226
defload_masking_regions(*filepaths:str)->Dict[str,List[BioInterval]]:
2327
"""
2428
reads a file of regions. The expect input format for the file is tab-delimited and
2529
the header should contain the following columns
26-
2730
- chr: the chromosome
2831
- start: start of the region, 1-based inclusive
2932
- end: end of the region, 1-based inclusive
3033
- name: the name/label of the region
31-
3234
For example:
33-
3435
.. code-block:: text
35-
3636
#chr start end name
3737
chr20 25600000 27500000 centromere
38-
3938
Args:
4039
filepath: path to the input tab-delimited file
4140
Returns:
4241
a dictionary keyed by chromosome name with values of lists of regions on the chromosome
4342
"""
43+
warnings.warn(
44+
"BED file support will be deprecated in future versions.",
45+
category=DeprecationWarning,
46+
stacklevel=2,
47+
)
4448
regions:Dict[str,List[BioInterval]]= {}
4549
forfilepathinfilepaths:
4650
df=pd.read_csv(
@@ -58,6 +62,67 @@ def load_masking_regions(*filepaths: str) -> Dict[str, List[BioInterval]]:
5862
returnregions
5963

6064

65+
defload_known_sv(*filepaths:str)->Dict[str,List["BreakpointPair"]]:
66+
"""
67+
loads a standard MAVIS or BED file input to a list of known breakpoints.
68+
69+
Standard BED file requirements:
70+
reads a file of regions. The expect input format for the file is tab-delimited and
71+
the header should contain the following columns
72+
73+
- chr: the chromosome
74+
- start: start of the region, 1-based inclusive
75+
- end: end of the region, 1-based inclusive
76+
- name: the name/label of the region
77+
78+
For example:
79+
80+
.. code-block:: text
81+
82+
#chr start end name
83+
chr20 25600000 27500000 centromere
84+
Args:
85+
filepath: path to standard MAVIS format file
86+
Returns:
87+
a dictionary with {str:{BreakpointPair}}
88+
"""
89+
regions= {}
90+
forfilepathinfilepaths:
91+
header=set(pd.read_csv(filepath,nrows=1,sep='\t').columns)
92+
mavis_header= {'break1_chromosome','break2_chromosome'}
93+
bed_header= {'chr','start','end','name'}
94+
ifmavis_header.issubset(header):
95+
bpps=read_bpp_from_input_file(filepath,expand_orient=True,expand_svtype=True)
96+
forbppinbpps:
97+
chr_list= [bpp.break1.chr,bpp.break2.chr]
98+
regions.setdefault(tuple(chr_list), []).append(bpp)
99+
100+
else:
101+
warnings.warn(
102+
"BED file support will be deprecated in future versions.",
103+
category=DeprecationWarning,
104+
stacklevel=2,
105+
)
106+
107+
df=pd.read_csv(
108+
filepath,sep='\t',dtype={'chr':str,'start':int,'end':int,'name':str}
109+
)
110+
forcolinbed_header:
111+
ifcolnotindf:
112+
raiseKeyError(f'missing required column ({col})')
113+
df['chr']=df['chr'].apply(lambdac:ReferenceName(c))
114+
forrowindf.to_dict('records'):
115+
known_sv_region=BioInterval(
116+
reference_object=row['chr'],
117+
start=row['start'],
118+
end=row['end'],
119+
name=row['name'],
120+
)
121+
regions.setdefault(known_sv_region.reference_object, []).append(known_sv_region)
122+
123+
returnregions
124+
125+
61126
defload_annotations(
62127
*filepaths:str,
63128
reference_genome:Optional[ReferenceGenome]=None,
@@ -117,7 +182,6 @@ def parse_annotations_json(
117182
domain_errors=0
118183

119184
forgene_dictindata['genes']:
120-
121185
gene=Gene(
122186
chr=gene_dict['chr'],
123187
start=gene_dict['start'],
@@ -346,7 +410,7 @@ class ReferenceFile:
346410
'reference_genome':load_reference_genome,
347411
'masking':load_masking_regions,
348412
'template_metadata':load_templates,
349-
'dgv_annotation':load_masking_regions,
413+
'dgv_annotation':load_known_sv,
350414
'aligner_reference':None,
351415
}
352416
"""dict: Mapping of file types (based on ENV name) to load functions"""

‎src/mavis/annotate/variant.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,6 @@ def __init__(self, refseq: str, mutseq: str):
368368
self.ins_seq=self.mut_seq[:0-self.cterm_aligned]
369369

370370
eliflen(self.ref_seq)-self.cterm_aligned+1<=self.nterm_aligned:
371-
372371
# repeat region
373372
diff=len(self.mut_seq)-len(self.ref_seq)
374373
ifdiff>0:

‎src/mavis/cluster/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1+
__all__= ['merge_breakpoint_pairs']
2+
3+
14
from .clusterimportmerge_breakpoint_pairs

‎src/mavis/constants.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ class CIGAR(MavisNamespace):
223223
"""
224224

225225
M=0
226-
I=1
226+
I=1# noqa
227227
D=2
228228
N=3
229229
S=4
@@ -386,6 +386,7 @@ class COLUMNS(MavisNamespace):
386386
library:str='library'
387387
cluster_id:str='cluster_id'
388388
cluster_size:str='cluster_size'
389+
dgv:str='dgv'
389390
validation_id:str='validation_id'
390391
annotation_id:str='annotation_id'
391392
product_id:str='product_id'
@@ -463,6 +464,7 @@ class COLUMNS(MavisNamespace):
463464
contig_strand_specific:str='contig_strand_specific'
464465
contigs_assembled:str='contigs_assembled'
465466
call_sequence_complexity:str='call_sequence_complexity'
467+
known_sv_count:str='known_sv_count'
466468
spanning_reads:str='spanning_reads'
467469
spanning_read_names:str='spanning_read_names'
468470
flanking_median_fragment_size:str='flanking_median_fragment_size'
@@ -555,4 +557,6 @@ def sort_columns(input_columns):
555557
COLUMNS.tools,
556558
COLUMNS.tools,
557559
COLUMNS.tracking_id,
560+
COLUMNS.dgv,
561+
COLUMNS.known_sv_count,
558562
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp