maint: pylint

linalgo · Mar 4, 2025 · 2d79ed6 · 2d79ed6
1 parent df2d457
commit 2d79ed6
Show file tree

Hide file tree

Showing 7 changed files with 48 additions and 28 deletions.
diff --git a/wsd/annotate/__init__.py b/wsd/annotate/__init__.py
@@ -1 +1,2 @@
-from .components import *
+"""A collection of mesop components to use in the annotation ui."""
+from .components import *
diff --git a/wsd/annotate/app.py b/wsd/annotate/app.py
@@ -1,19 +1,19 @@
+"""Annotation UI for Japanese Word Sense Disambiguation."""
+# pylint: disable=unused-argument,no-member,bare-except,no-name-in-module
 import os
-from typing import List, Dict
+from typing import List
 from dataclasses import asdict, field
 
 import mesop as me
-from mesop.server.wsgi_app import create_app
 
 from fugashi import Tagger
 
 from linalgo.hub.client import LinalgoClient
-from linalgo.annotate.models import Annotation, Document, Target, Task
-from linalgo.annotate.serializers import AnnotationSerializer, DocumentSerializer
+from linalgo.annotate.models import Annotation, Target
 
 from wsd.parsers.jmdict import Entry
 from wsd.annotate.lindict import LinDictAPI
-from wsd.annotate import LinDoc, LinEntry, Token
+from wsd.annotate import lin_doc, lin_entry, Token
 
 
 LINHUB_TOKEN = os.getenv('LINHUB_TOKEN')
@@ -32,9 +32,10 @@
 tagger = Tagger('-Owakati')
 
 
-
 @me.stateclass
 class State:
+    """Application state class."""
+    # pylint: disable=invalid-field-call,too-few-public-methods
     tokens: List[Token] = field(default_factory=list)
     entries: List[Entry] = field(default_factory=list)
     cur: int = 0
@@ -43,15 +44,18 @@ class State:
 
 
 def get_next_document():
+    """Get the next document for annotation."""
     try:
         linhub.document = linhub.get_next_document(LINHUB_TASK)
         return linhub.document
     except:
         state = me.state(State)
         state.done = True
+        return None
 
 
 def get_tokens():
+    """Tokenize the current document."""
     doc = linhub.document
     tokens = []
     for word in tagger(doc.content):
@@ -65,12 +69,14 @@ def get_tokens():
 
 
 def get_entries():
+    """Retrieve dictionary entries for the current token."""
     state = me.state(State)
     lemma = state.tokens[state.cur].lemma
     return lindict.search(lemma)
 
 
 def on_load(e):
+    """Prepare application state"""
     state = me.state(State)
     get_next_document()
     if linhub.document is not None:
@@ -116,6 +122,7 @@ def on_load(e):
     ),
 )
 def app():
+    """Japanese Word Sense Disambiguation UI"""
     state = me.state(State)
 
     with me.box(style=header):
@@ -127,13 +134,13 @@ def app():
     else:
         with me.box(style=body):
             tokens = [asdict(t) for t in state.tokens]
-            LinDoc(tokens=tokens, on_pop=_on_pop, cur=state.cur)
+            lin_doc(tokens=tokens, on_pop=_on_pop, cur=state.cur)
 
         me.divider()
 
         with me.box(style=entries):
             for entry in state.entries:
-                LinEntry(
+                lin_entry(
                     entry=asdict(entry),
                     selected=state.selected == entry.ent_seq,
                     on_chosen=_on_chosen
@@ -193,8 +200,3 @@ def _next(event):
             state.entries = get_entries()
         except:
             state.done = True
-
-
-if __name__ == "__main__":
-    app = create_app(prod_mode=True)
-    app._flask_app.run(host="localhost", port=8080, use_reloader=True)
diff --git a/wsd/annotate/components/__init__.py b/wsd/annotate/components/__init__.py
@@ -1 +1,2 @@
-from .pop import *
+"""A collection of python wrappers for LIT components"""
+from .pop import *
diff --git a/wsd/annotate/components/pop.py b/wsd/annotate/components/pop.py
@@ -1,3 +1,4 @@
+"""Mesopt wrapper for LIT components"""
 from dataclasses import dataclass
 from typing import Any, Callable, List, Dict
 
@@ -6,19 +7,21 @@
 
 @dataclass
 class Token:
+    """Token dataclass"""
     text: str = ''
     lemma: str = ''
     pos: str = ''
 
 
 @mel.web_component(path="../lit/dist/linpop.js")
-def LinDoc(
+def lin_doc(
     *,
     tokens: List[Dict],
     cur: int,
     on_pop: Callable[[mel.WebEvent], Any],
     key: str | None = None,
 ):
+    """Wrapper for the LIT LinDoc component."""
     return mel.insert_web_component(
         name="lin-doc",
         key=key,
@@ -28,13 +31,14 @@ def LinDoc(
 
 
 @mel.web_component(path="../lit/dist/linpop.js")
-def LinEntry(
+def lin_entry(
     *,
     entry: Dict,
     selected: bool,
     on_chosen: Callable[[mel.WebEvent], Any],
     key: str | None = None
 ):
+    """Wrapper for the LIT LinEntry component."""
     return mel.insert_web_component(
         name="lin-entry",
         key=key,
@@ -43,4 +47,4 @@ def LinEntry(
     )
 
 
-__all__ = ["LinDoc", "LinEntry", "Token"]
+__all__ = ["lin_doc", "lin_entry", "Token"]
diff --git a/wsd/annotate/lindict.py b/wsd/annotate/lindict.py
@@ -1,11 +1,27 @@
+# pylint: disable=too-few-public-methods, missing-timeout
+"""A simple interface for the LinDict API"""
+from typing import List
 import requests
 
 from wsd.parsers import Entry
 
+
 class LinDictAPI:
     """A simple interface for the LinDict API"""
 
-    def search(self, query) -> Entry:
+    def search(self, query) -> List[Entry]:
+        """Search the dictionary using for the given query.
+
+        Parameters
+        ----------
+        query : str
+            The query string.
+
+        Return
+        ------
+        entries : List[Entry]
+            A list of entries.
+        """
         url = f"https://lindict.api.linalgo.com/v1/ja/search/?query={query}"
         response = requests.get(url)
         response.raise_for_status()
@@ -16,9 +32,3 @@ def search(self, query) -> Entry:
         for entry in data['results']:
             entries.append(Entry.from_dict(entry))
         return entries
-
-if __name__ ==  "__main__":
-    lindict = LinDictAPI()
-    entries = lindict.search('馬酔木')
-    for entry in entries:
-        print(entry)
diff --git a/wsd/parsers/jmdict.py b/wsd/parsers/jmdict.py
@@ -94,9 +94,10 @@ def from_node(cls, node):
         dial = [d.text for d in node.findall('dial')]
         gloss = [Gloss.from_node(g) for g in node.findall('gloss')]
         return cls(stagk, stagr, pos, xref, ant, field_, misc, s_inf, lsource, dial, gloss)
-    
+
     @classmethod
     def from_dict(cls, data):
+        """Create a Sense object from a dictionary"""
         data['field_'] = data.pop('field')
         data['gloss'] = [Gloss(**g) for g in data.pop('glosses')]
         return cls(**data)
@@ -119,9 +120,10 @@ def from_node(cls, node):
         r_ele = [Reading.from_node(r) for r in node.iter('r_ele')]
         senses = [Sense.from_node(s) for s in node.iter('sense')]
         return cls(ent_seq, k_ele, r_ele, senses)
-    
+
     @classmethod
     def from_dict(cls, data):
+        """Create an Entry object from a dictionary"""
         k_ele = [Kanji(**k) for k in data['kanjis']]
         r_ele = [Reading(**r) for r in data['readings']]
         senses = [Sense.from_dict(s) for s in data['senses']]

diff --git a/wsd/parsers/xlwsd.py b/wsd/parsers/xlwsd.py
@@ -33,7 +33,7 @@ def parse(lang):
         current_dir = os.path.dirname(__file__)
         filepath = os.path.join(current_dir, '../../data/xl-wsd-data.zip')
         zf = zipfile.ZipFile(filepath, 'r')
-        
+
         base_dir = f'xl-wsd/training_datasets/semcor_{lang}'
         labels = f'{base_dir}/semcor_{lang}.gold.key.txt'
         corpus = f'{base_dir}/semcor_{lang}.data.xml'