From 88be7e45bbebaecccaa553df05a33ff3f11caaf8 Mon Sep 17 00:00:00 2001
From: Arnaud Rachez <arnaud@linalgo.com>
Date: Sun, 2 Mar 2025 22:51:47 +0900
Subject: [PATCH] feat: draft annotation ui

---
 README.md                           |  4 ++
 wsd/annotate/__init__.py            |  0
 wsd/annotate/app.py                 | 64 +++++++++++++++++++++++++----
 wsd/annotate/components/__init__.py |  1 +
 wsd/annotate/components/pop.js      | 36 ++++++++++++++++
 wsd/annotate/components/pop.py      | 26 ++++++++++++
 wsd/models/baseline.py              | 21 ++++++++--
 wsd/parsers/jmdict.py               | 12 +++---
 8 files changed, 146 insertions(+), 18 deletions(-)
 delete mode 100644 wsd/annotate/__init__.py
 create mode 100644 wsd/annotate/components/__init__.py
 create mode 100644 wsd/annotate/components/pop.js
 create mode 100644 wsd/annotate/components/pop.py

diff --git a/README.md b/README.md
index 6c84462..0cb5fb9 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,10 @@ Let's build a state-of-the-art multi-lingual Word Sense Disambiguation model.
 - Download the data with `git lfs fetch --all`
 - See `examples/data.ipynb`.
 
+## Annotate new data
+
+Run `mesop wsd/annotate/app.py`
+
 ## Attribution and LICENSE
 - [The JMDict Project](https://www.edrdg.org/jmdict/j_jmdict.html)
 - [XL-WSD](https://sapienzanlp.github.io/xl-wsd/docs/data/)
diff --git a/wsd/annotate/__init__.py b/wsd/annotate/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/wsd/annotate/app.py b/wsd/annotate/app.py
index 8f734bf..2831edd 100644
--- a/wsd/annotate/app.py
+++ b/wsd/annotate/app.py
@@ -1,10 +1,18 @@
+from dataclasses import field
+from typing import List
 import mesop as me
+import mesop.labs as mel
 from mesop.server.wsgi_app import create_app
 
 from wsd.parsers import XLWSDParser
+from wsd.parsers.jmdict import Entry
+from wsd.models import JMDict
+from wsd.annotate.components import linpop_component
 
-parser = XLWSDParser()
-X, y = parser.parse("ja")
+xlwsd = XLWSDParser()
+X, y = xlwsd.parse("ja")
+
+jmdict = JMDict()
 
 style_grid = me.Style(
     display="grid",
@@ -19,23 +27,61 @@
     padding=me.Padding.all(24),
     overflow_y="auto"
 )
-style_footer = me.Style(
-    background="#f0f0f0",
-    padding=me.Padding.all(24)
+style_card = me.Style(
+    padding=me.Padding.all(24),
+    overflow_y="auto",
+    z_index=100,
+    box_shadow="0 0 10px rgba(0, 0, 0, 0.1)"
+)
+style_group = me.Style(
+    display="flex",
+    gap=8
 )
 
+@me.stateclass
+class State:
+  candidates: List[Entry] = field(default_factory=list)
 
-@me.page(path="/")
+@me.page(
+    path="/",
+    security_policy=me.SecurityPolicy(
+        allowed_script_srcs=[
+            "https://cdn.jsdelivr.net",
+        ]
+    ),
+)
 def app():
     with me.box(style=style_grid):
         with me.box(style=style_header):
             me.text("SEMCOR WSD")
 
         with me.box(style=style_body):
-            me.text(''.join(X[0]))
+            with  me.box(style=style_card):
+                for tok in X[0]:
+                    linpop_component(
+                        text=tok,
+                        on_pop=on_pop
+                    )
+            state = me.state(State)
+            for candidate in state.candidates:
+                with me.box(style=style_card):
+                    me.text(candidate.ent_seq)
+                    with me.box(style=style_group):
+                        for kanji in candidate.k_ele:
+                            me.text(kanji.keb)
+                    with me.box(style=style_group):
+                        for reading in candidate.r_ele:
+                            me.text(reading.reb)
+                    with me.box():
+                        for sense in candidate.sense:
+                            for gloss in sense.gloss:
+                                me.text(gloss.text)
+
 
-        with me.box(style=style_footer):
-            me.text("Footer")
+def on_pop(event: mel.WebEvent):
+    state = me.state(State)
+    query = event.value['text']
+    state.candidates = jmdict.search(query)
 
 
 if __name__ == "__main__":
diff --git a/wsd/annotate/components/__init__.py b/wsd/annotate/components/__init__.py
new file mode 100644
index 0000000..6ec6c9b
--- /dev/null
+++ b/wsd/annotate/components/__init__.py
@@ -0,0 +1 @@
+from .pop import linpop_component
\ No newline at end of file
diff --git a/wsd/annotate/components/pop.js b/wsd/annotate/components/pop.js
new file mode 100644
index 0000000..a6df0d3
--- /dev/null
+++ b/wsd/annotate/components/pop.js
@@ -0,0 +1,36 @@
+import {
+    LitElement,
+    html,
+    css
+} from 'https://cdn.jsdelivr.net/gh/lit/dist@3/all/lit-all.min.js';
+
+class LinPopComponent extends LitElement {
+    static properties = {
+        text: { type: String },
+        popEvent: { type: String }
+    };
+    static styles = css`
+        .pop:hover {
+            background-color:rgb(181, 181, 181);
+            cursor: pointer;
+        }
+    `;
+
+    render() {
+        return html`
+        <span class="pop" @click="${this._pop}">
+          ${this.text}
+        </span>
+      `;
+    }
+
+    _pop() {
+        this.dispatchEvent(
+            new MesopEvent(this.popEvent, {
+                text: this.text,
+            }),
+        );
+    }
+}
+
+customElements.define('linpop-component', LinPopComponent);
\ No newline at end of file
diff --git a/wsd/annotate/components/pop.py b/wsd/annotate/components/pop.py
new file mode 100644
index 0000000..511a3e2
--- /dev/null
+++ b/wsd/annotate/components/pop.py
@@ -0,0 +1,26 @@
+from typing import Any, Callable
+
+import mesop.labs as mel
+
+
+@mel.web_component(path="./pop.js")
+def linpop_component(
+  *,
+  text: str,
+  on_pop: Callable[[mel.WebEvent], Any],
+  key: str | None = None,
+):
+  return mel.insert_web_component(
+    name="linpop-component",
+    key=key,
+    events={
+      "popEvent": on_pop,
+    },
+    properties={
+      "text": text,
+    },
+  )
+
+__all__ = [
+  "linpop_component",
+]
\ No newline at end of file
diff --git a/wsd/models/baseline.py b/wsd/models/baseline.py
index 712f1d8..5036a66 100644
--- a/wsd/models/baseline.py
+++ b/wsd/models/baseline.py
@@ -1,14 +1,19 @@
 """A simple dictionary interface for JMDict."""
+import os
+from typing import List
 from wsd.parsers import JMDictParser
+from wsd.parsers.jmdict import Entry
 
+data_dir = os.path.join(os.path.dirname(__file__), '../../data')
 
 class JMDict:
     """A simple dictionary interface for JMDict"""
 
     def __init__(self):
-        self.entries = JMDictParser().parse('../data/JMdict_en.gz')
+        jmdict_file = os.path.join(data_dir, 'JMdict_en.gz')
+        self.entries = JMDictParser().parse(jmdict_file)
 
-    def search(self, text):
+    def search(self, text: str) -> List[Entry]:
         """Search for an entry by text.
 
         Currently returns all entries that contain the text in either the kanji 
@@ -18,6 +23,11 @@ def search(self, text):
         ----------
         text : str
             The text to search for
+        
+        Returns
+        -------
+        List[Entry]
+            A list of entries that contain the query.
         """
         res = []
         for entry in self.entries:
@@ -29,7 +39,7 @@ def search(self, text):
                     res.append(entry)
         return res
 
-    def feeling_lucky(self, text):
+    def feeling_lucky(self, text: str) -> Entry:
         """Return the first entry found.
 
         Currently returns the first entry that contains the text in either the
@@ -39,6 +49,11 @@ def feeling_lucky(self, text):
         ----------
         text : str
             The text to search for
+        
+        Returns
+        -------
+        Entry
+            The first entry that contains the query.
         """
         entries = self.search(text)
         return entries[0] if entries else None
diff --git a/wsd/parsers/jmdict.py b/wsd/parsers/jmdict.py
index e2df5a6..f7efbc6 100644
--- a/wsd/parsers/jmdict.py
+++ b/wsd/parsers/jmdict.py
@@ -11,7 +11,7 @@
 @dataclass
 class Kanji:
     """Kanji element"""
-    keb: str
+    keb: str = field(default_factory=str)
     ke_inf: List[str] = field(default_factory=list)
     ke_pri: List[str] = field(default_factory=list)
 
@@ -27,7 +27,7 @@ def from_node(cls, node):
 @dataclass
 class Reading:
     """Reading element"""
-    reb: str
+    reb: str = field(default_factory=str)
     re_nokanji: bool = False
     re_restr: List[str] = field(default_factory=list)
     re_inf: List[str] = field(default_factory=list)
@@ -47,8 +47,8 @@ def from_node(cls, node):
 @dataclass
 class Gloss:
     """A gloss element"""
-    text: str
-    lang: str = None
+    text: str = field(default_factory=str)
+    lang: str = field(default_factory=str)
 
     @classmethod
     def from_node(cls, node):
@@ -69,7 +69,7 @@ class Sense:
     ant: List[str] = field(default_factory=list)
     field_: List[str] = field(default_factory=list)
     misc: List[str] = field(default_factory=list)
-    s_inf: str = None
+    s_inf: str = field(default_factory=str)
     lsource: List[str] = field(default_factory=list)
     dial: List[str] = field(default_factory=list)
     gloss: List[Gloss] = field(default_factory=list)
@@ -95,7 +95,7 @@ def from_node(cls, node):
 @dataclass
 class Entry:
     """A dictionary entry"""
-    ent_seq: str
+    ent_seq: str = field(default_factory=str)
     k_ele: List[Kanji] = field(default_factory=list)
     r_ele: List[Reading] = field(default_factory=list)
     sense: List[Sense] = field(default_factory=list)