Skip to content

Commit

Permalink
Merge pull request #203 from helicalAI/evo-2-updates
Browse files Browse the repository at this point in the history
Evo 2 more comprehensive notebook and 40B configs
  • Loading branch information
maxiallard authored Feb 26, 2025
2 parents d888020 + ffcc330 commit 6fb608d
Show file tree
Hide file tree
Showing 3 changed files with 1,478 additions and 377 deletions.
1,723 changes: 1,362 additions & 361 deletions examples/notebooks/Evo-2.ipynb

Large diffs are not rendered by default.

16 changes: 2 additions & 14 deletions helical/models/evo_2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,13 @@

**Model Name:** Evo 2

**Model Versions:** 1B, 7B and 40B (Currently only the 1B and 7B models are available and the 40B coming soon)
**Model Versions:** 1B, 7B and 40B

**Model Description:** Evo 2 is a next-generation genomic model that integrates DNA, RNA, and protein data across all domains of life. It leverages the StripedHyena 2 architecture, combining convolutional, linear attention, and state-space models to efficiently process long sequences and capture complex biological patterns. Evo 2 is trained on a vast dataset encompassing trillions of nucleotides from eukaryotic and prokaryotic genomes, enabling broad cross-species applications and insights into human diseases, agriculture, and environmental science.

## Model Developers

**Arc Institute**

**Stanford University**

**NVIDIA**

**Liquid AI**

**University of California, Berkeley**

**Goodfire**

**Columbia University**
Arc Institute, Stanford University, NVIDIA, Liquid AI, University of California, Berkeley, Goodfire, Columbia University

**Contact Information:**

Expand Down
116 changes: 114 additions & 2 deletions helical/models/evo_2/evo_2_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,12 @@ def __init__(
"hcm_layer_idxs": [1, 5, 8, 12, 15, 19, 22, 26, 29],
"hcs_layer_idxs": [0, 4, 7, 11, 14, 18, 21, 25, 28],
"attn_layer_idxs": [3, 10, 17, 24, 31],

"hcm_filter_length": 128,
"hcl_filter_groups": 4096,
"hcm_filter_groups": 256,
"hcs_filter_groups": 256,
"hcs_filter_length": 7,
"num_layers": 32,

"short_filter_length": 3,
"num_attention_heads": 32,
"short_filter_bias": False,
Expand Down Expand Up @@ -205,6 +203,120 @@ def __init__(
"mlp_activation": "gelu",
"print_activations": False,
},
"evo2-40b-base": {
"model_name": "evo2_40b_base",
"model_hf_name": "arcinstitute/evo2_40b_base",
"default_embedding_layer": "blocks.49.mlp.l3",
"vocab_size": 512,
"hidden_size": 8192,
"num_filters": 8192,
"hcl_layer_idxs": [2, 6, 9, 13, 16, 20, 23, 27, 30, 34, 38, 41, 45, 48],
"hcm_layer_idxs": [1, 5, 8, 12, 15, 19, 22, 26, 29, 33, 37, 40, 44, 47],
"hcs_layer_idxs": [0, 4, 7, 11, 14, 18, 21, 25, 28, 32, 36, 39, 43, 46],
"attn_layer_idxs": [3, 10, 17, 24, 31, 35, 42, 49],
"hcm_filter_length": 128,
"hcl_filter_groups": 8192,
"hcm_filter_groups": 512,
"hcs_filter_groups": 512,
"hcs_filter_length": 7,
"num_layers": 50,
"short_filter_length": 3,
"num_attention_heads": 64,
"short_filter_bias": False,
"mlp_init_method": "torch.nn.init.zeros_",
"mlp_output_init_method": "torch.nn.init.zeros_",
"eps": 0.000001,
"state_size": 16,
"rotary_emb_base": 1000000,
"make_vocab_size_divisible_by": 8,
"inner_size_multiple_of": 128,
"inner_mlp_size": 21888,
"log_intermediate_values": False,
"proj_groups": 1,
"hyena_filter_groups": 1,
"column_split_hyena": False,
"column_split": True,
"interleave": True,
"evo2_style_activations": True,
"use_fp8_input_projections": True,
"model_parallel_size": 1,
"pipe_parallel_size": 1,
"tie_embeddings": True,
"mha_out_proj_bias": True,
"hyena_out_proj_bias": True,
"hyena_flip_x1x2": False,
"qkv_proj_bias": False,
"max_seqlen": 8192,
"max_batch_size": 1,
"final_norm": True,
"use_flash_attn": True,
"use_flash_rmsnorm": False,
"use_flash_depthwise": False,
"use_flashfft": False,
"use_laughing_hyena": False,
"inference_mode": True,
"prefill_style": "fft",
"mlp_activation": "gelu",
"print_activations": False,
},
"evo2-40b": {
"model_name": "evo2_40b",
"model_hf_name": "arcinstitute/evo2_40b",
"default_embedding_layer": "blocks.49.mlp.l3",
"vocab_size": 512,
"hidden_size": 8192,
"num_filters": 8192,
"hcl_layer_idxs": [2, 6, 9, 13, 16, 20, 23, 27, 30, 34, 38, 41, 45, 48],
"hcm_layer_idxs": [1, 5, 8, 12, 15, 19, 22, 26, 29, 33, 37, 40, 44, 47],
"hcs_layer_idxs": [0, 4, 7, 11, 14, 18, 21, 25, 28, 32, 36, 39, 43, 46],
"attn_layer_idxs": [3, 10, 17, 24, 31, 35, 42, 49],
"hcm_filter_length": 128,
"hcl_filter_groups": 8192,
"hcm_filter_groups": 512,
"hcs_filter_groups": 512,
"hcs_filter_length": 7,
"num_layers": 50,
"short_filter_length": 3,
"num_attention_heads": 64,
"short_filter_bias": False,
"mlp_init_method": "torch.nn.init.zeros_",
"mlp_output_init_method": "torch.nn.init.zeros_",
"eps": 0.000001,
"state_size": 16,
"rotary_emb_base": 100000000000,
"rotary_emb_scaling_factor": 128,
"use_interpolated_rotary_pos_emb": True,
"make_vocab_size_divisible_by": 8,
"inner_size_multiple_of": 128, # force GLU inner_size to be a multiple of
"inner_mlp_size": 22528,
"log_intermediate_values": False,
"proj_groups": 1,
"hyena_filter_groups": 1,
"column_split_hyena": False,
"column_split": True,
"interleave": True,
"evo2_style_activations": True,
"use_fp8_input_projections": True,
"model_parallel_size": 1,
"pipe_parallel_size": 1,
"tie_embeddings": True,
"mha_out_proj_bias": True,
"hyena_out_proj_bias": True,
"hyena_flip_x1x2": False,
"qkv_proj_bias": False,
"max_seqlen": 1048576,
"max_batch_size": 1,
"final_norm": True,
"use_flash_attn": True,
"use_flash_rmsnorm": False,
"use_flash_depthwise": False,
"use_flashfft": False,
"use_laughing_hyena": False,
"inference_mode": True,
"prefill_style": "fft",
"mlp_activation": "gelu",
"print_activations": False,
},
}
if model_name not in self.model_map:
raise ValueError(
Expand Down

0 comments on commit 6fb608d

Please sign in to comment.