Skip to content

Commit 0bd6e67

Browse files
committed
format
1 parent fb327b4 commit 0bd6e67

File tree

3 files changed

+25
-24
lines changed

3 files changed

+25
-24
lines changed

src/oaklib/cli.py

-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
from prefixmaps.io.parser import load_multi_context
3737
from pydantic import BaseModel
3838
from sssom.parsers import parse_sssom_table, to_mapping_set_document
39-
from tornado.gen import multi
4039

4140
import oaklib.datamodels.taxon_constraints as tcdm
4241
from oaklib import datamodels

src/oaklib/implementations/cx/cx_implementation.py

-2
Original file line numberDiff line numberDiff line change
@@ -73,5 +73,3 @@ def __post_init__(self):
7373
locator = path
7474
cx = ndex2.create_nice_cx_from_file(path)
7575
self.obograph_document = from_cx(cx)
76-
77-

src/oaklib/implementations/llm_implementation.py

+25-21
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import re
77
import time
88
from dataclasses import dataclass
9-
from typing import TYPE_CHECKING, Dict, Iterable, Iterator, List, Optional, Tuple, Any
9+
from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Optional, Tuple
1010

1111
import pystow
1212
from linkml_runtime.dumpers import yaml_dumper
@@ -236,7 +236,9 @@ def __post_init__(self):
236236
def _embeddings_collection_name(self) -> str:
237237
name = self.wrapped_adapter.resource.slug
238238
if not name:
239-
raise ValueError(f"Wrapped adapter must have a slug: {self.wrapped_adapter} // {self.wrapped_adapter.resource}")
239+
raise ValueError(
240+
f"Wrapped adapter must have a slug: {self.wrapped_adapter} // {self.wrapped_adapter.resource}"
241+
)
240242
return name
241243

242244
def entities(self, **kwargs) -> Iterator[CURIE]:
@@ -281,7 +283,6 @@ def _parse_response(self, json_str: str) -> Any:
281283
json_str = json_str[4:].strip()
282284
return json.loads(json_str)
283285

284-
285286
def get_model(self):
286287
model = self.model
287288
if not self.model:
@@ -297,6 +298,7 @@ def get_model(self):
297298
def _embed_terms(self):
298299
import llm
299300
import sqlite_utils
301+
300302
adapter = self.wrapped_adapter
301303
name = self._embeddings_collection_name
302304
path_to_db = pystow.join("oaklib", "llm", "embeddings")
@@ -308,14 +310,13 @@ def _embed_terms(self):
308310

309311
def _term_embedding(self, id: CURIE) -> Optional[tuple]:
310312
import llm
313+
311314
db = self._embeddings_collection.db
312315
name = self._embeddings_collection_name
313316
collection_ids = list(db["collections"].rows_where("name = ?", (name,)))
314317
collection_id = collection_ids[0]["id"]
315318
matches = list(
316-
db["embeddings"].rows_where(
317-
"collection_id = ? and id = ?", (collection_id, id)
318-
)
319+
db["embeddings"].rows_where("collection_id = ? and id = ?", (collection_id, id))
319320
)
320321
if not matches:
321322
logger.debug(f"ID not found: {id} in {collection_id} ({name})")
@@ -324,18 +325,18 @@ def _term_embedding(self, id: CURIE) -> Optional[tuple]:
324325
comparison_vector = llm.decode(embedding)
325326
return comparison_vector
326327

327-
328328
def pairwise_similarity(
329-
self,
330-
subject: CURIE,
331-
object: CURIE,
332-
predicates: List[PRED_CURIE] = None,
333-
subject_ancestors: List[CURIE] = None,
334-
object_ancestors: List[CURIE] = None,
335-
min_jaccard_similarity: Optional[float] = None,
336-
min_ancestor_information_content: Optional[float] = None,
329+
self,
330+
subject: CURIE,
331+
object: CURIE,
332+
predicates: List[PRED_CURIE] = None,
333+
subject_ancestors: List[CURIE] = None,
334+
object_ancestors: List[CURIE] = None,
335+
min_jaccard_similarity: Optional[float] = None,
336+
min_ancestor_information_content: Optional[float] = None,
337337
) -> Optional[TermPairwiseSimilarity]:
338338
import llm
339+
339340
self._embed_terms()
340341
subject_embedding = self._term_embedding(subject)
341342
if not subject_embedding:
@@ -351,7 +352,9 @@ def pairwise_similarity(
351352
)
352353
return sim
353354

354-
def _ground_term(self, term: str, categories: Optional[List[str]] = None) -> Optional[Tuple[str, float]]:
355+
def _ground_term(
356+
self, term: str, categories: Optional[List[str]] = None
357+
) -> Optional[Tuple[str, float]]:
355358
matches = list(self._match_terms(term))
356359
system = """
357360
Given a list of ontology terms, find the one that best matches the given term.
@@ -361,7 +364,7 @@ def _ground_term(self, term: str, categories: Optional[List[str]] = None) -> Opt
361364
- ANAT:002 pericardium
362365
Then a valid response is {"id": "ANAT:001", "confidence": 0.8}.
363366
"""
364-
prompt = f"Find the best match for the term: \"{term}\".\n"
367+
prompt = f'Find the best match for the term: "{term}".\n'
365368
if categories:
366369
if len(categories) == 1:
367370
prompt += f"Term Category: {categories[0]}.\n"
@@ -401,7 +404,11 @@ def annotate_text(
401404
grounded, _confidence = self._ground_term(text, configuration.categories)
402405
logger.info(f"Grounded {text} to {grounded}")
403406
if grounded:
404-
yield TextAnnotation(subject_label=text, object_id=grounded, object_label=self.wrapped_adapter.label(grounded))
407+
yield TextAnnotation(
408+
subject_label=text,
409+
object_id=grounded,
410+
object_label=self.wrapped_adapter.label(grounded),
411+
)
405412
return
406413
else:
407414
logging.info("Delegating directly to grounder, bypassing LLM")
@@ -495,9 +502,6 @@ def _match_terms(self, text: str) -> Iterator[Tuple[str, float]]:
495502
logger.debug(f"Similar: {entry}")
496503
yield entry.id, entry.score
497504

498-
499-
500-
501505
def _suggest_aliases(
502506
self,
503507
term: str,

0 commit comments

Comments
 (0)