Skip to content

Commit

Permalink
Change field names
Browse files Browse the repository at this point in the history
  • Loading branch information
valeriy42 committed Nov 2, 2023
1 parent c6dd202 commit f618672
Showing 1 changed file with 24 additions and 16 deletions.
40 changes: 24 additions & 16 deletions eland/ml/pytorch/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"""

import json
import logging
import os.path
import random
import re
Expand Down Expand Up @@ -63,6 +64,8 @@
)
from eland.ml.pytorch.traceable_model import TraceableModel

logger = logging.getLogger(__name__)

DEFAULT_OUTPUT_KEY = "sentence_embedding"
SUPPORTED_TASK_TYPES = {
"fill_mask",
Expand Down Expand Up @@ -772,35 +775,37 @@ def _create_config(
)

# add static and dynamic memory state size to metadata
static_memory_size = self._get_model_memory()
per_deployment_memory_bytes = self._get_model_memory()

transient_memory_size = self._get_transient_memory(
per_allocation_memory_bytes = self._get_transient_memory(
tokenization_config.max_sequence_length, 1
)
peak_memory_size = self._get_peak_memory(
static_memory_size, transient_memory_size
peak_memory_bytes = self._get_peak_memory(
per_deployment_memory_bytes, per_allocation_memory_bytes
)
# TODO: final field names are subject to change
metadata = {
"static_memory_size": static_memory_size,
"transient_memory_size": transient_memory_size,
"peak_memory_size": peak_memory_size,
"per_deployment_memory_bytes": per_deployment_memory_bytes,
"per_allocation_memory_bytes": per_allocation_memory_bytes,
"peak_memory_bytes": peak_memory_bytes,
}

# TODO: remove this once memory metadata is supported by ES
logger.info("Model metadata: %s", metadata)

return NlpTrainedModelConfig(
description=f"Model {self._model_id} for task type '{self._task_type}'",
model_type="pytorch",
inference_config=inference_config,
input=TrainedModelInput(
field_names=["text_field"],
),
# TODO: uncomment this line once memory metadata is supported by ES
# metadata=metadata,
metadata=metadata,
)

def _get_model_memory(self) -> float:
"""
Returns the static memory size of the model in MB.
Returns the static memory size of the model in bytes.
"""
psize = sum(
param.nelement() * param.element_size()
Expand All @@ -810,11 +815,11 @@ def _get_model_memory(self) -> float:
buffer.nelement() * buffer.element_size()
for buffer in self._traceable_model.model.buffers()
)
return (psize + bsize) / 1024**2 # in MB
return (psize + bsize)

def _get_transient_memory(self, max_seq_length: int, batch_size: int) -> float:
"""
Returns the transient memory size of the model in MB.
Returns the transient memory size of the model in bytes.
Parameters
----------
Expand All @@ -829,21 +834,24 @@ def _get_transient_memory(self, max_seq_length: int, batch_size: int) -> float:
inputs_1 = self._get_model_inputs(max_seq_length, 1)
with profile(activities=activities, profile_memory=True) as prof:
self._traceable_model.model(**inputs_1)
mem1 = prof.key_averages().total_average().cpu_memory_usage / 1024**2
mem1 = prof.key_averages().total_average().cpu_memory_usage

# This is measuring memory usage of the model with a batch size of 2 and
# then linearly extrapolating it to get the memory usage of the model for
# a batch size of batch_size.
if batch_size == 1:
return mem1 # in MB
return mem1
else:
inputs_2 = self._get_model_inputs(max_seq_length, 2)
with profile(activities=activities, profile_memory=True) as prof:
self._traceable_model.model(**inputs_2)
mem2 = prof.key_averages().total_average().cpu_memory_usage / 1024**2
return mem1 + (mem2 - mem1) * (batch_size - 1) # in MB
mem2 = prof.key_averages().total_average().cpu_memory_usage
return mem1 + (mem2 - mem1) * (batch_size - 1)

def _get_peak_memory(self, size_model_mb: float, transient: float) -> float:
"""
Returns the peak memory size of the model in bytes.
"""
return max(2 * size_model_mb, size_model_mb + transient)

def _get_model_inputs(
Expand Down

0 comments on commit f618672

Please sign in to comment.