@@ -369,10 +369,36 @@ rule select_strains:
369
369
--output {output.strains}
370
370
"""
371
371
372
+ def _get_lineage_reference(wildcards):
373
+ if wildcards.resolution in {'2y', '6m'}:
374
+ if wildcards.lineage == "h1n1pdm":
375
+ return "A/California/7/2009"
376
+ if wildcards.lineage == "h3n2":
377
+ return "A/Wisconsin/67/2005"
378
+ if wildcards.lineage == "vic":
379
+ return "B/Brisbane/60/2008"
380
+
381
+ return ""
382
+
383
+
384
+ rule include_reference_strain:
385
+ message: "Adding reference strain to selected strains (only for 2y and 6m builds)"
386
+ input:
387
+ strains = "results/strains_{center}_{lineage}_{resolution}_{passage}_{assay}.txt"
388
+ output:
389
+ strains = "results/strains_with_reference_{center}_{lineage}_{resolution}_{passage}_{assay}.txt"
390
+ params:
391
+ reference = _get_lineage_reference
392
+ shell:
393
+ """
394
+ cp {input.strains} {output.strains}
395
+ echo "\n{params.reference}" >> {output.strains}
396
+ """
397
+
372
398
rule extract:
373
399
input:
374
400
sequences = rules.filter.output.sequences,
375
- strains = rules.select_strains .output.strains
401
+ strains = rules.include_reference_strain .output.strains
376
402
output:
377
403
sequences = 'results/extracted_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.fasta'
378
404
conda: "environment.yaml"
@@ -467,6 +493,9 @@ rule tree:
467
493
--exclude-sites {input.exclude_sites}
468
494
"""
469
495
496
+ def _get_refine_root(wildcards):
497
+ return _get_lineage_reference(wildcards) or "best"
498
+
470
499
rule refine:
471
500
message:
472
501
"""
@@ -488,7 +517,8 @@ rule refine:
488
517
date_inference = "marginal",
489
518
clock_filter_iqd = 4,
490
519
clock_rate = clock_rate,
491
- clock_std_dev = clock_std_dev
520
+ clock_std_dev = clock_std_dev,
521
+ root = _get_refine_root
492
522
conda: "environment.yaml"
493
523
resources:
494
524
mem_mb=16000
@@ -507,13 +537,30 @@ rule refine:
507
537
--coalescent {params.coalescent} \
508
538
--date-confidence \
509
539
--date-inference {params.date_inference} \
510
- --clock-filter-iqd {params.clock_filter_iqd}
540
+ --clock-filter-iqd {params.clock_filter_iqd} \
541
+ --root {params.root}
511
542
"""
512
543
544
+ rule prune_reference:
545
+ message: "Pruning reference strain from the tree (only for 2y and 6m builds)"
546
+ input:
547
+ tree = rules.refine.output.tree
548
+ output:
549
+ tree = "results/tree_pruned_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.nwk"
550
+ params:
551
+ reference = _get_lineage_reference
552
+ run:
553
+ from Bio import Phylo
554
+ T = Phylo.read(input.tree, "newick")
555
+ reference = [ c for c in T.find_clades() if str(c.name) == params.reference][0]
556
+ T.prune(reference)
557
+ Phylo.write(T, output.tree, "newick")
558
+
559
+
513
560
rule ancestral:
514
561
message: "Reconstructing ancestral sequences and mutations"
515
562
input:
516
- tree = rules.refine .output.tree,
563
+ tree = rules.prune_reference .output.tree,
517
564
alignment = rules.align.output
518
565
output:
519
566
node_data = "results/nt-muts_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json"
@@ -534,7 +581,7 @@ rule ancestral:
534
581
rule translate:
535
582
message: "Translating amino acid sequences"
536
583
input:
537
- tree = rules.refine .output.tree,
584
+ tree = rules.prune_reference .output.tree,
538
585
node_data = rules.ancestral.output.node_data,
539
586
reference = files.reference
540
587
output:
@@ -552,7 +599,7 @@ rule translate:
552
599
rule reconstruct_translations:
553
600
message: "Reconstructing translations required for titer models and frequencies"
554
601
input:
555
- tree = rules.refine .output.tree,
602
+ tree = rules.prune_reference .output.tree,
556
603
node_data = "results/aa-muts_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json",
557
604
output:
558
605
aa_alignment = "results/aa-seq_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}_{gene}.fasta"
@@ -572,7 +619,7 @@ rule reconstruct_translations:
572
619
573
620
rule convert_translations_to_json:
574
621
input:
575
- tree = rules.refine .output.tree,
622
+ tree = rules.prune_reference .output.tree,
576
623
translations = translations
577
624
output:
578
625
translations = "results/aa-seq_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json"
@@ -595,7 +642,7 @@ rule traits:
595
642
Inferring ancestral traits for {params.columns!s}
596
643
"""
597
644
input:
598
- tree = rules.refine .output.tree,
645
+ tree = rules.prune_reference .output.tree,
599
646
metadata = rules.parse.output.metadata
600
647
output:
601
648
node_data = "results/traits_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json",
@@ -617,7 +664,7 @@ rule titers_sub:
617
664
titers = rules.download_titers.output.titers,
618
665
aa_muts = rules.translate.output,
619
666
alignments = translations,
620
- tree = rules.refine .output.tree
667
+ tree = rules.prune_reference .output.tree
621
668
params:
622
669
genes = gene_names
623
670
output:
@@ -637,7 +684,7 @@ rule titers_sub:
637
684
rule titers_tree:
638
685
input:
639
686
titers = rules.download_titers.output.titers,
640
- tree = rules.refine .output.tree
687
+ tree = rules.prune_reference .output.tree
641
688
output:
642
689
titers_model = "results/titers-tree-model_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json",
643
690
conda: "environment.yaml"
@@ -652,7 +699,7 @@ rule titers_tree:
652
699
653
700
rule tip_frequencies:
654
701
input:
655
- tree = rules.refine .output.tree,
702
+ tree = rules.prune_reference .output.tree,
656
703
metadata = rules.parse.output.metadata,
657
704
params:
658
705
narrow_bandwidth = 2 / 12.0,
@@ -681,7 +728,7 @@ rule tip_frequencies:
681
728
682
729
rule tree_frequencies:
683
730
input:
684
- tree = rules.refine .output.tree,
731
+ tree = rules.prune_reference .output.tree,
685
732
metadata = rules.parse.output.metadata,
686
733
params:
687
734
min_date = min_date,
@@ -709,7 +756,7 @@ rule tree_frequencies:
709
756
710
757
rule diffusion_frequencies:
711
758
input:
712
- tree = rules.refine .output.tree,
759
+ tree = rules.prune_reference .output.tree,
713
760
metadata = rules.parse.output.metadata,
714
761
params:
715
762
min_date = min_date,
@@ -735,7 +782,7 @@ rule diffusion_frequencies:
735
782
736
783
rule delta_frequency:
737
784
input:
738
- tree = rules.refine .output.tree,
785
+ tree = rules.prune_reference .output.tree,
739
786
frequencies = rules.diffusion_frequencies.output.frequencies
740
787
output:
741
788
delta_frequency = "results/delta_frequency_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json"
@@ -756,7 +803,7 @@ rule delta_frequency:
756
803
rule clades:
757
804
message: "Annotating clades"
758
805
input:
759
- tree = "results/tree_ {center}_{lineage}_ha_{resolution}_{passage}_{assay}.nwk",
806
+ tree = "results/tree_pruned_ {center}_{lineage}_ha_{resolution}_{passage}_{assay}.nwk",
760
807
nt_muts = rules.ancestral.output,
761
808
aa_muts = rules.translate.output,
762
809
clades = _get_clades_file_for_wildcards
@@ -781,7 +828,7 @@ rule clades:
781
828
782
829
rule antigenic_distances_between_strains:
783
830
input:
784
- tree="results/tree_ {center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.nwk",
831
+ tree="results/tree_pruned_ {center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.nwk",
785
832
clades="results/clades_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json",
786
833
titer_model="results/titers-sub-model_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json",
787
834
titers="data/{center}_{lineage}_{passage}_{assay}_titers.tsv",
@@ -836,7 +883,7 @@ rule plot_antigenic_distances_between_strains:
836
883
837
884
rule distances:
838
885
input:
839
- tree = rules.refine .output.tree,
886
+ tree = rules.prune_reference .output.tree,
840
887
alignments = translations,
841
888
distance_maps = _get_distance_maps_by_lineage_and_segment
842
889
params:
@@ -862,7 +909,7 @@ rule distances:
862
909
863
910
rule pairwise_titer_tree_distances:
864
911
input:
865
- tree = rules.refine .output.tree,
912
+ tree = rules.prune_reference .output.tree,
866
913
frequencies = rules.tip_frequencies.output.tip_freq,
867
914
model = rules.titers_tree.output.titers_model,
868
915
date_annotations = rules.refine.output.node_data
@@ -917,7 +964,7 @@ rule titer_tree_cross_immunities:
917
964
918
965
rule glyc:
919
966
input:
920
- tree = rules.refine .output.tree,
967
+ tree = rules.prune_reference .output.tree,
921
968
alignment = _get_glyc_alignment
922
969
output:
923
970
glyc = "results/glyc_{center}_{lineage}_{segment}_{resolution}_{passage}_{assay}.json"
@@ -933,7 +980,7 @@ rule glyc:
933
980
rule lbi:
934
981
message: "Calculating LBI"
935
982
input:
936
- tree = rules.refine .output.tree,
983
+ tree = rules.prune_reference .output.tree,
937
984
branch_lengths = rules.refine.output.node_data
938
985
params:
939
986
tau = _get_lbi_tau_for_wildcards,
0 commit comments