Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support short null for exon mode #183

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions pyard/data_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,17 +362,12 @@ def generate_alleles_and_xx_codes_and_who(
xx_codes = db.load_dict(db_connection, "xx_codes", ("allele_1d", "allele_list"))
xx_codes = {k: v.split("/") for k, v in xx_codes.items()}

shortnulls = db.load_dict(
db_connection, "shortnulls", ("shortnull", "allele_list")
)
shortnulls = {k: v.split("/") for k, v in shortnulls.items()}

exp_alleles = db.load_dict(
db_connection, "exp_alleles", ("exp_allele", "allele_list")
)
exp_alleles = {k: v.split("/") for k, v in exp_alleles.items()}

return valid_alleles, who_alleles, xx_codes, who_group, shortnulls, exp_alleles
return valid_alleles, who_alleles, xx_codes, who_group, exp_alleles

# Create a Pandas DataFrame from the mac_code list file
# Skip the header (first 6 lines) and use only the Allele column
Expand Down Expand Up @@ -481,6 +476,17 @@ def generate_alleles_and_xx_codes_and_who(
db_connection, "who_group", flat_who_group, columns=("who", "allele_list")
)

return valid_alleles, who_alleles, xx_codes, who_group, exp_alleles


def generate_short_nulls(db_connection, who_group):
if db.table_exists(db_connection, "shortnulls"):
shortnulls = db.load_dict(
db_connection, "shortnulls", ("shortnull", "allele_list")
)
shortnulls = {k: v.split("/") for k, v in shortnulls.items()}
return shortnulls

# shortnulls
# scan WHO alleles for those with expression characters and make shortnull mappings
# DRB4*01:03N | DRB4*01:03:01:02N/DRB4*01:03:01:13N
Expand Down Expand Up @@ -508,8 +514,7 @@ def generate_alleles_and_xx_codes_and_who(

db.save_dict(db_connection, "shortnulls", shortnulls, ("shortnull", "allele_list"))
shortnulls = {k: v.split("/") for k, v in shortnulls.items()}

return valid_alleles, who_alleles, xx_codes, who_group, shortnulls, exp_alleles
return shortnulls


def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):
Expand Down
19 changes: 15 additions & 4 deletions pyard/pyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from . import data_repository as dr
from .smart_sort import smart_sort_comparator
from .exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
from .misc import get_n_field_allele, get_2field_allele
from .misc import get_n_field_allele, get_2field_allele, expression_chars

HLA_regex = re.compile("^HLA-")

Expand Down Expand Up @@ -109,12 +109,14 @@ def __init__(
self.who_alleles,
self.xx_codes,
self.who_group,
self.shortnulls,
self.exp_alleles,
) = dr.generate_alleles_and_xx_codes_and_who(
self.db_connection, imgt_version, self.ars_mappings
)

# Generate short nulls from WHO mapping
self.shortnulls = dr.generate_short_nulls(self.db_connection, self.who_group)

# Load Serology mappings
dr.generate_serology_mapping(self.db_connection, imgt_version)
# Load V2 to V3 mappings
Expand Down Expand Up @@ -213,7 +215,16 @@ def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES, reping=True) ->
return allele
elif redux_type == "exon":
if allele in self.ars_mappings.exon_group:
return self.ars_mappings.exon_group[allele]
exon_group_allele = self.ars_mappings.exon_group[allele]
# Check if the 3 field exon allele has a 4 field alleles
# that all have the same expression characters
last_char = allele[-1]
if last_char in expression_chars:
exon_short_null_allele = exon_group_allele + last_char
if self.is_shortnull(exon_short_null_allele):
return exon_short_null_allele

return exon_group_allele
else:
# for 'exon' return allele with only first 3 fields
return ":".join(allele.split(":")[0:3])
Expand Down Expand Up @@ -360,7 +371,7 @@ def redux_gl(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
else:
raise InvalidMACError(f"{glstring} is an invalid MAC.")

# Handle shortnulls
# Handle short nulls
if self._config["reduce_shortnull"] and self.is_shortnull(glstring):
return self.redux_gl("/".join(self.shortnulls[glstring]), redux_type)

Expand Down
27 changes: 17 additions & 10 deletions tests/features/shortnulls.feature
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
Feature: shortnull
Feature: Short Nulls

**Short Nulls**: If a reduced allele with an expression character has the same expression
character in it's 4 field expansion, the expression character should be propagated in the
reduced version of the allele.

Scenario Outline:

Given the allele as <Allele>
When reducing on the <Level> level (ambiguous)
Then the reduced allele is found to be <Redux Allele>

Examples: expression characters not propagated
| Allele | Level | Redux Allele |
| DRB4*01:03N | lgx | DRB4*01:01 |
| DRB4*01:03:01N | lgx | DRB4*01:01 |
| DRB5*01:08N | lgx | DRB5*01:02/DRB5*01:08 |

Examples: shortnulls
| Allele | Level | Redux Allele |
| DRB4*01:03N | lgx | DRB4*01:01 |
| DRB4*01:03N | exon | DRB4*01:03:01 |
Examples: expression characters propagated
| Allele | Level | Redux Allele |
| DRB4*01:03N | exon | DRB4*01:03:01N |
| DRB4*01:03N | W | DRB4*01:03:01:02N/DRB4*01:03:01:13N |
| DRB4*01:03:01N | lgx | DRB4*01:01 |
| DRB4*01:03:01N | exon | DRB4*01:03:01 |
| DRB4*01:03:01N | exon | DRB4*01:03:01N |
| DRB4*01:03:01N | W | DRB4*01:03:01:02N/DRB4*01:03:01:13N |
| DRB5*01:08N | lgx | DRB5*01:02/DRB5*01:08 |
| DRB5*01:08N | exon | DRB5*01:08:01N/DRB5*01:08:02N |
| DRB5*01:08N | W | DRB5*01:08:01N/DRB5*01:08:02N |
| DRB5*01:08N | exon | DRB5*01:08:01N/DRB5*01:08:02N |
| DRB5*01:08N | W | DRB5*01:08:01N/DRB5*01:08:02N |
| A*01:04N | exon | A*01:04:01N |