Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partially fix #5310 - allow loading of mixed cases with some fully WTS sample individuals #5327

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/)
### Changed
- Allow ACMG criteria strength modification to Very strong/Stand-alone (#5297)
- Mocked the Ensembl liftover service in igv tracks tests (#5319)
- Allow loading of mixed analysis type cases where some individuals are fully WTS and do not appear in DNA VCFs (#5327)
### Fixed
- Re-enable display of case and individual specific tracks (pre-computed coverage, UPD, zygosity) (#5300)
- Disable 2-color mode in IGV.js by default, since it obscures variant proportion of reads. Can be manually enabled (#5311)
Expand Down
22 changes: 11 additions & 11 deletions scout/adapter/mongo/variant_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,16 +621,16 @@ def _has_variants_in_file(self, variant_file: str) -> bool:

def load_variants(
self,
case_obj,
variant_type="clinical",
category="snv",
rank_threshold=None,
chrom=None,
start=None,
end=None,
gene_obj=None,
custom_images=None,
build="37",
case_obj: dict,
variant_type: str = "clinical",
category: str = "snv",
rank_threshold: float = None,
chrom: str = None,
start: int = None,
end: int = None,
gene_obj: dict = None,
custom_images: list = None,
build: str = "37",
):
"""Load variants for a case into scout.

Expand Down Expand Up @@ -675,7 +675,7 @@ def load_variants(
)

gene_to_panels = self.gene_to_panels(case_obj)
genes = [gene_obj for gene_obj in self.all_genes(build=build)]
genes = list(self.all_genes(build=build))
hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
genomic_intervals = self.get_coding_intervals(genes=genes, build=build)

Expand Down
9 changes: 2 additions & 7 deletions scout/constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
MIMNR_PATTERN,
OMIM_STATUS_MAP,
)
from .file_types import ORDERED_FILE_TYPE_MAP, ORDERED_OMICS_FILE_TYPE_MAP
from .file_types import INVALID_SAMPLE_TYPES, ORDERED_FILE_TYPE_MAP, ORDERED_OMICS_FILE_TYPE_MAP
from .filters import (
CLINICAL_FILTER_BASE,
CLINICAL_FILTER_BASE_CANCER,
Expand All @@ -64,12 +64,7 @@
PANEL_GENE_INFO_TRANSCRIPTS,
UPDATE_GENES_RESOURCES,
)
from .igv_tracks import (
CASE_SPECIFIC_TRACKS,
HUMAN_REFERENCE,
IGV_TRACKS,
USER_DEFAULT_TRACKS,
)
from .igv_tracks import CASE_SPECIFIC_TRACKS, HUMAN_REFERENCE, IGV_TRACKS, USER_DEFAULT_TRACKS
from .indexes import ID_PROJECTION, INDEXES
from .panels import PANELAPP_CONFIDENCE_EXCLUDE
from .phenotype import (
Expand Down
12 changes: 12 additions & 0 deletions scout/constants/file_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@
]
)

INVALID_SAMPLE_TYPES = {
"snv": ["wts"],
"sv": ["wts"],
"mei": ["wts"],
"str": ["wts"],
"vcf_snv_mt": ["wts"],
"vcf_snv_research_mt": ["wts"],
"vcf_snv_research": ["wts"],
"vcf_sv_research_mt": ["wts"],
"vcf_sv_research": ["wts"],
"vcf_mei_research": ["wts"],
}

ORDERED_OMICS_FILE_TYPE_MAP = OrderedDict(
[
Expand Down
1 change: 1 addition & 0 deletions scout/demo/643594.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ samples:
splice_junctions_bed: scout/demo/ACC5963A1_lanes_1234_star_sorted_sj_filtered_sorted.bed.gz
d4_file: scout/demo/ADM1059A3.d4


custom_images:
str_variants_images:
- title: A png image
Expand Down
14 changes: 4 additions & 10 deletions scout/parse/variant/genotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,10 @@
LOG = logging.getLogger(__name__)


def parse_genotypes(variant, individuals, individual_positions):
"""Parse the genotype calls for a variant

Args:
variant(cyvcf2.Variant)
individuals: List[dict]
individual_positions(dict)
Returns:
genotypes(list(dict)): A list of genotypes
"""
def parse_genotypes(
variant: cyvcf2.Variant, individuals: List[Dict], individual_positions: Dict
) -> List[Dict]:
"""Parse the genotype calls for a variant"""
genotypes = []
for ind in individuals:
pos = individual_positions[ind["individual_id"]]
Expand Down
16 changes: 5 additions & 11 deletions scout/parse/variant/models.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
def parse_genetic_models(models_info, case_id):
def parse_genetic_models(models_info: str, case_id: str) -> list:
"""Parse the genetic models entry of a vcf

Args:
models_info(str): The raw vcf information
case_id(str)

Returns:
genetic_models(list)

Return inheritance patterns.
"""
genetic_models = []
if models_info:
for family_info in models_info.split(","):
splitted_info = family_info.split(":")
if splitted_info[0] == case_id:
genetic_models = splitted_info[1].split("|")
split_info = family_info.split(":")
if split_info[0] == case_id:
genetic_models = split_info[1].split("|")

return genetic_models
18 changes: 5 additions & 13 deletions scout/parse/variant/rank_score.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,9 @@
def parse_rank_score(rank_score_entry, case_id):
"""Parse the rank score

Args:
rank_score_entry(str): The raw rank score entry
case_id(str)

Returns:
rank_score(float)
"""
def parse_rank_score(rank_score_entry: str, case_id: str) -> float:
"""Parse the rank score from the raw rank score entry"""
rank_score = None
if rank_score_entry:
for family_info in rank_score_entry.split(","):
splitted_info = family_info.split(":")
if case_id == splitted_info[0]:
rank_score = float(splitted_info[1])
split_info = family_info.split(":")
if case_id == split_info[0]:
rank_score = float(split_info[1])
return rank_score
169 changes: 68 additions & 101 deletions scout/parse/variant/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from cyvcf2 import Variant

from scout.constants import CHR_PATTERN
from scout.constants import CHR_PATTERN, INVALID_SAMPLE_TYPES
from scout.exceptions import VcfError
from scout.utils.convert import call_safe
from scout.utils.dict_utils import remove_nonetype
Expand Down Expand Up @@ -57,89 +57,66 @@ def parse_variant(
# Vep information
vep_header = vep_header or []

parsed_variant = {}
# Create the ID for the variant
case_id = case["_id"]
genmod_key = get_genmod_key(case)
chrom_match = CHR_PATTERN.match(variant.CHROM)
chrom = chrom_match.group(2)

# Builds a dictionary with the different ids that are used
alt = get_variant_alternative(variant, category)

parsed_variant["ids"] = parse_ids(
chrom=chrom,
pos=variant.POS,
ref=variant.REF,
alt=alt,
case_id=case_id,
variant_type=variant_type,
)
parsed_variant["case_id"] = case_id
# type can be 'clinical' or 'research'
parsed_variant["variant_type"] = variant_type

category = get_category(category, variant, parsed_variant)
parsed_variant["category"] = category

################# General information #################
parsed_variant["reference"] = variant.REF

### We allways assume splitted and normalized vcfs!!!
### We always assume split and normalized vcfs!!!
if len(variant.ALT) > 1:
raise VcfError("Variants are only allowed to have one alternative")
parsed_variant["alternative"] = alt

# cyvcf2 will set QUAL to None if '.' in vcf
parsed_variant["quality"] = variant.QUAL
# Builds a dictionary with the different ids that are used
alt = get_variant_alternative(variant, category)

coordinates = parse_coordinates(variant, category, case.get("genome_build"))

parsed_variant["filters"] = get_filters(variant)
parsed_variant = {
"ids": parse_ids(
chrom=chrom,
pos=variant.POS,
ref=variant.REF,
alt=alt,
case_id=case_id,
variant_type=variant_type,
),
"case_id": case_id,
"variant_type": variant_type,
"reference": variant.REF,
"quality": variant.QUAL, # cyvcf2 will set QUAL to None if '.' in vcf
"filters": get_filters(variant),
"alternative": alt,
"chromosome": chrom,
"cytoband_end": coordinates["cytoband_end"],
"cytoband_start": coordinates["cytoband_start"],
"end": coordinates["end"],
"end_chrom": coordinates["end_chrom"],
"length": coordinates["length"],
"mate_id": coordinates["mate_id"],
"position": coordinates["position"],
"sub_category": coordinates["sub_category"],
"samples": get_samples(variant, individual_positions, case, category),
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main change really: we pass on the category given to this function for checking that the samples are relevant to the variant type.

"compounds": parse_compounds(
compound_info=variant.INFO.get("Compounds"),
case_id=genmod_key,
variant_type=variant_type,
),
"rank_score": parse_rank_score(variant.INFO.get("RankScore", ""), genmod_key) or 0,
"genetic_models": parse_genetic_models(variant.INFO.get("GeneticModels"), genmod_key),
"str_swegen_mean": call_safe(float, variant.INFO.get("SweGenMean")),
"str_swegen_std": call_safe(float, variant.INFO.get("SweGenStd")),
"somatic_score": call_safe(int, variant.INFO.get("SOMATICSCORE")),
"custom": parse_custom_data(variant.INFO.get("SCOUT_CUSTOM")),
}

# Add the dbsnp ids
category = get_and_set_category(parsed_variant, category, variant)
set_dbsnp_id(parsed_variant, variant.ID)

# This is the id of other position in translocations
# (only for specific svs)
parsed_variant["mate_id"] = None

################# Position specific #################
parsed_variant["chromosome"] = chrom

coordinates = parse_coordinates(variant, category, case.get("genome_build"))

parsed_variant["cytoband_end"] = coordinates["cytoband_end"]
parsed_variant["cytoband_start"] = coordinates["cytoband_start"]
parsed_variant["end"] = coordinates["end"]
parsed_variant["end_chrom"] = coordinates["end_chrom"]
parsed_variant["length"] = coordinates["length"]
parsed_variant["mate_id"] = coordinates["mate_id"]
parsed_variant["position"] = coordinates["position"]
parsed_variant["sub_category"] = coordinates["sub_category"]

################# Add rank score #################
# The rank score is central for displaying variants in scout.
# Use RankScore for somatic variations also

rank_score = parse_rank_score(variant.INFO.get("RankScore", ""), genmod_key)
parsed_variant["rank_score"] = rank_score or 0

################# Add gt calls #################
parsed_variant["samples"] = get_samples(variant, individual_positions, case)

################# Add compound information #################
compounds = parse_compounds(
compound_info=variant.INFO.get("Compounds"),
case_id=genmod_key,
variant_type=variant_type,
)

parsed_variant["compounds"] = compounds

################# Add inheritance patterns #################
parsed_variant["genetic_models"] = parse_genetic_models(
variant.INFO.get("GeneticModels"), genmod_key
)

################# Add autozygosity calls if present #################
parsed_variant["azlength"] = call_safe(int, variant.INFO.get("AZLENGTH"))
parsed_variant["azqual"] = call_safe(float, variant.INFO.get("AZQUAL"))
Expand All @@ -148,27 +125,19 @@ def parse_variant(
set_str_info(variant, parsed_variant)
# STR source dict with display string, source type and entry id
set_str_source(parsed_variant, variant)
parsed_variant["str_swegen_mean"] = call_safe(float, variant.INFO.get("SweGenMean"))
parsed_variant["str_swegen_std"] = call_safe(float, variant.INFO.get("SweGenStd"))

# MEI variant info
set_mei_info(variant, parsed_variant)

# Add Fusion info
set_fusion_info(variant, parsed_variant)

################# Add somatic info ##################
parsed_variant["somatic_score"] = call_safe(int, variant.INFO.get("SOMATICSCORE"))

################# Add mitomap info, from HmtNote #################
set_mitomap_associated_diseases(parsed_variant, variant)

################# Add HmtVar variant id, from HmtNote #################
add_hmtvar(parsed_variant, variant)

### Add custom info
parsed_variant["custom"] = parse_custom_data(variant.INFO.get("SCOUT_CUSTOM"))

### Add gene and transcript information
if parsed_variant.get("category") == "fusion":
parsed_transcripts = add_gene_and_transcript_info_for_fusions(parsed_variant)
Expand Down Expand Up @@ -378,42 +347,40 @@ def get_filters(variant):
return ["PASS"]


def get_samples(variant, individual_positions, case):
def get_samples(variant: Variant, individual_positions: dict, case: dict, category: str) -> List:
"""Get samples

Args:
variant(cyvcf2.Variant)
individual_positions(dict):
case(dict)
Return:
variant filter
Add GT calls to individuals.

Do not add individuals if they are not wanted based on the analysis type,
eg a WTS only individual for a DNA SNV variant.
"""
if individual_positions and case["individuals"]:
return parse_genotypes(variant, case["individuals"], individual_positions)
if individual_positions and bool(case["individuals"]):
individuals = [
ind
for ind in case["individuals"]
if ind.get("analysis_type") not in INVALID_SAMPLE_TYPES.get(category, [])
]
return parse_genotypes(variant, individuals, individual_positions)
return []


def get_category(category, variant, parsed_variant):
"""Get category of variant.
Args:
category(str)
variant(cyvcf2.Variant)
parsed_variant(dict)
Return:
category(str)
def get_and_set_category(parsed_variant: dict, category: str, variant: Variant) -> str:
"""Set category of variant. Convenience return of category only.

If category not set, assume it's an SNP or INDEL and set to type "snv".
Warn on old style "mnp", but set "snv" as well - these are "INDEL" effectively.
"""

if category:
parsed_variant["category"] = category
return category

var_type = variant.var_type
if var_type == "indel":
return "snv"
if var_type == "snp":
return "snv"
if var_type == "mnp":
LOG.warning("Category MNP found: {}".format(parsed_variant["ids"]["display_name"]))
return "snv"
return var_type
if variant.var_type == "mnp":
LOG.warning(f"Category MNP found: {parsed_variant['ids']['display_name']}")

parsed_variant["category"] = "snv"
return parsed_variant["category"]


def set_dbsnp_id(parsed_variant, variant_id):
Expand Down
Loading
Loading