Skip to content

Commit

Permalink
maint: pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
arachez committed Mar 4, 2025
1 parent df2d457 commit 2d79ed6
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 28 deletions.
3 changes: 2 additions & 1 deletion wsd/annotate/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .components import *
"""A collection of mesop components to use in the annotation ui."""
from .components import *
28 changes: 15 additions & 13 deletions wsd/annotate/app.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""Annotation UI for Japanese Word Sense Disambiguation."""
# pylint: disable=unused-argument,no-member,bare-except,no-name-in-module
import os
from typing import List, Dict
from typing import List
from dataclasses import asdict, field

import mesop as me
from mesop.server.wsgi_app import create_app

from fugashi import Tagger

from linalgo.hub.client import LinalgoClient
from linalgo.annotate.models import Annotation, Document, Target, Task
from linalgo.annotate.serializers import AnnotationSerializer, DocumentSerializer
from linalgo.annotate.models import Annotation, Target

from wsd.parsers.jmdict import Entry
from wsd.annotate.lindict import LinDictAPI
from wsd.annotate import LinDoc, LinEntry, Token
from wsd.annotate import lin_doc, lin_entry, Token


LINHUB_TOKEN = os.getenv('LINHUB_TOKEN')
Expand All @@ -32,9 +32,10 @@
tagger = Tagger('-Owakati')



@me.stateclass
class State:
"""Application state class."""
# pylint: disable=invalid-field-call,too-few-public-methods
tokens: List[Token] = field(default_factory=list)
entries: List[Entry] = field(default_factory=list)
cur: int = 0
Expand All @@ -43,15 +44,18 @@ class State:


def get_next_document():
"""Get the next document for annotation."""
try:
linhub.document = linhub.get_next_document(LINHUB_TASK)
return linhub.document
except:
state = me.state(State)
state.done = True
return None


def get_tokens():
"""Tokenize the current document."""
doc = linhub.document
tokens = []
for word in tagger(doc.content):
Expand All @@ -65,12 +69,14 @@ def get_tokens():


def get_entries():
"""Retrieve dictionary entries for the current token."""
state = me.state(State)
lemma = state.tokens[state.cur].lemma
return lindict.search(lemma)


def on_load(e):
"""Prepare application state"""
state = me.state(State)
get_next_document()
if linhub.document is not None:
Expand Down Expand Up @@ -116,6 +122,7 @@ def on_load(e):
),
)
def app():
"""Japanese Word Sense Disambiguation UI"""
state = me.state(State)

with me.box(style=header):
Expand All @@ -127,13 +134,13 @@ def app():
else:
with me.box(style=body):
tokens = [asdict(t) for t in state.tokens]
LinDoc(tokens=tokens, on_pop=_on_pop, cur=state.cur)
lin_doc(tokens=tokens, on_pop=_on_pop, cur=state.cur)

me.divider()

with me.box(style=entries):
for entry in state.entries:
LinEntry(
lin_entry(
entry=asdict(entry),
selected=state.selected == entry.ent_seq,
on_chosen=_on_chosen
Expand Down Expand Up @@ -193,8 +200,3 @@ def _next(event):
state.entries = get_entries()
except:
state.done = True


if __name__ == "__main__":
app = create_app(prod_mode=True)
app._flask_app.run(host="localhost", port=8080, use_reloader=True)
3 changes: 2 additions & 1 deletion wsd/annotate/components/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .pop import *
"""A collection of python wrappers for LIT components"""
from .pop import *
10 changes: 7 additions & 3 deletions wsd/annotate/components/pop.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Mesopt wrapper for LIT components"""
from dataclasses import dataclass
from typing import Any, Callable, List, Dict

Expand All @@ -6,19 +7,21 @@

@dataclass
class Token:
"""Token dataclass"""
text: str = ''
lemma: str = ''
pos: str = ''


@mel.web_component(path="../lit/dist/linpop.js")
def LinDoc(
def lin_doc(
*,
tokens: List[Dict],
cur: int,
on_pop: Callable[[mel.WebEvent], Any],
key: str | None = None,
):
"""Wrapper for the LIT LinDoc component."""
return mel.insert_web_component(
name="lin-doc",
key=key,
Expand All @@ -28,13 +31,14 @@ def LinDoc(


@mel.web_component(path="../lit/dist/linpop.js")
def LinEntry(
def lin_entry(
*,
entry: Dict,
selected: bool,
on_chosen: Callable[[mel.WebEvent], Any],
key: str | None = None
):
"""Wrapper for the LIT LinEntry component."""
return mel.insert_web_component(
name="lin-entry",
key=key,
Expand All @@ -43,4 +47,4 @@ def LinEntry(
)


__all__ = ["LinDoc", "LinEntry", "Token"]
__all__ = ["lin_doc", "lin_entry", "Token"]
24 changes: 17 additions & 7 deletions wsd/annotate/lindict.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
# pylint: disable=too-few-public-methods, missing-timeout
"""A simple interface for the LinDict API"""
from typing import List
import requests

from wsd.parsers import Entry


class LinDictAPI:
"""A simple interface for the LinDict API"""

def search(self, query) -> Entry:
def search(self, query) -> List[Entry]:
"""Search the dictionary using for the given query.
Parameters
----------
query : str
The query string.
Return
------
entries : List[Entry]
A list of entries.
"""
url = f"https://lindict.api.linalgo.com/v1/ja/search/?query={query}"
response = requests.get(url)
response.raise_for_status()
Expand All @@ -16,9 +32,3 @@ def search(self, query) -> Entry:
for entry in data['results']:
entries.append(Entry.from_dict(entry))
return entries

if __name__ == "__main__":
lindict = LinDictAPI()
entries = lindict.search('馬酔木')
for entry in entries:
print(entry)
6 changes: 4 additions & 2 deletions wsd/parsers/jmdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,10 @@ def from_node(cls, node):
dial = [d.text for d in node.findall('dial')]
gloss = [Gloss.from_node(g) for g in node.findall('gloss')]
return cls(stagk, stagr, pos, xref, ant, field_, misc, s_inf, lsource, dial, gloss)

@classmethod
def from_dict(cls, data):
"""Create a Sense object from a dictionary"""
data['field_'] = data.pop('field')
data['gloss'] = [Gloss(**g) for g in data.pop('glosses')]
return cls(**data)
Expand All @@ -119,9 +120,10 @@ def from_node(cls, node):
r_ele = [Reading.from_node(r) for r in node.iter('r_ele')]
senses = [Sense.from_node(s) for s in node.iter('sense')]
return cls(ent_seq, k_ele, r_ele, senses)

@classmethod
def from_dict(cls, data):
"""Create an Entry object from a dictionary"""
k_ele = [Kanji(**k) for k in data['kanjis']]
r_ele = [Reading(**r) for r in data['readings']]
senses = [Sense.from_dict(s) for s in data['senses']]
Expand Down
2 changes: 1 addition & 1 deletion wsd/parsers/xlwsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def parse(lang):
current_dir = os.path.dirname(__file__)
filepath = os.path.join(current_dir, '../../data/xl-wsd-data.zip')
zf = zipfile.ZipFile(filepath, 'r')

base_dir = f'xl-wsd/training_datasets/semcor_{lang}'
labels = f'{base_dir}/semcor_{lang}.gold.key.txt'
corpus = f'{base_dir}/semcor_{lang}.data.xml'
Expand Down

0 comments on commit 2d79ed6

Please sign in to comment.