Skip to content

Commit

Permalink
feat: draft annotation ui
Browse files Browse the repository at this point in the history
  • Loading branch information
arachez committed Mar 2, 2025
1 parent 6e6aa87 commit 88be7e4
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 18 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Let's build a state-of-the-art multi-lingual Word Sense Disambiguation model.
- Download the data with `git lfs fetch --all`
- See `examples/data.ipynb`.

## Annotate new data

Run `mesop wsd/annotate/app.py`

## Attribution and LICENSE
- [The JMDict Project](https://www.edrdg.org/jmdict/j_jmdict.html)
- [XL-WSD](https://sapienzanlp.github.io/xl-wsd/docs/data/)
Empty file removed wsd/annotate/__init__.py
Empty file.
64 changes: 55 additions & 9 deletions wsd/annotate/app.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
from dataclasses import field
from typing import List
import mesop as me
import mesop.labs as mel
from mesop.server.wsgi_app import create_app

from wsd.parsers import XLWSDParser
from wsd.parsers.jmdict import Entry
from wsd.models import JMDict
from wsd.annotate.components import linpop_component

parser = XLWSDParser()
X, y = parser.parse("ja")
xlwsd = XLWSDParser()
X, y = xlwsd.parse("ja")

jmdict = JMDict()

style_grid = me.Style(
display="grid",
Expand All @@ -19,23 +27,61 @@
padding=me.Padding.all(24),
overflow_y="auto"
)
style_footer = me.Style(
background="#f0f0f0",
padding=me.Padding.all(24)
style_card = me.Style(
padding=me.Padding.all(24),
overflow_y="auto",
z_index=100,
box_shadow="0 0 10px rgba(0, 0, 0, 0.1)"
)
style_group = me.Style(
display="flex",
gap=8
)

@me.stateclass
class State:
candidates: List[Entry] = field(default_factory=list)

@me.page(path="/")
@me.page(
path="/",
security_policy=me.SecurityPolicy(
allowed_script_srcs=[
"https://cdn.jsdelivr.net",
]
),
)
def app():
with me.box(style=style_grid):
with me.box(style=style_header):
me.text("SEMCOR WSD")

with me.box(style=style_body):
me.text(''.join(X[0]))
with me.box(style=style_card):
for tok in X[0]:
linpop_component(
text=tok,
on_pop=on_pop
)
state = me.state(State)
for candidate in state.candidates:
with me.box(style=style_card):
me.text(candidate.ent_seq)
with me.box(style=style_group):
for kanji in candidate.k_ele:
me.text(kanji.keb)
with me.box(style=style_group):
for reading in candidate.r_ele:
me.text(reading.reb)
with me.box():
for sense in candidate.sense:
for gloss in sense.gloss:
me.text(gloss.text)


with me.box(style=style_footer):
me.text("Footer")
def on_pop(event: mel.WebEvent):
state = me.state(State)
query = event.value['text']
state.candidates = jmdict.search(query)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions wsd/annotate/components/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .pop import linpop_component
36 changes: 36 additions & 0 deletions wsd/annotate/components/pop.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import {
LitElement,
html,
css
} from 'https://cdn.jsdelivr.net/gh/lit/dist@3/all/lit-all.min.js';

class LinPopComponent extends LitElement {
static properties = {
text: { type: String },
popEvent: { type: String }
};
static styles = css`
.pop:hover {
background-color:rgb(181, 181, 181);
cursor: pointer;
}
`;

render() {
return html`
<span class="pop" @click="${this._pop}">
${this.text}
</span>
`;
}

_pop() {
this.dispatchEvent(
new MesopEvent(this.popEvent, {
text: this.text,
}),
);
}
}

customElements.define('linpop-component', LinPopComponent);
26 changes: 26 additions & 0 deletions wsd/annotate/components/pop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from typing import Any, Callable

import mesop.labs as mel


@mel.web_component(path="./pop.js")
def linpop_component(
*,
text: str,
on_pop: Callable[[mel.WebEvent], Any],
key: str | None = None,
):
return mel.insert_web_component(
name="linpop-component",
key=key,
events={
"popEvent": on_pop,
},
properties={
"text": text,
},
)

__all__ = [
"linpop_component",
]
21 changes: 18 additions & 3 deletions wsd/models/baseline.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
"""A simple dictionary interface for JMDict."""
import os
from typing import List
from wsd.parsers import JMDictParser
from wsd.parsers.jmdict import Entry

data_dir = os.path.join(os.path.dirname(__file__), '../../data')

class JMDict:
"""A simple dictionary interface for JMDict"""

def __init__(self):
self.entries = JMDictParser().parse('../data/JMdict_en.gz')
jmdict_file = os.path.join(data_dir, 'JMdict_en.gz')
self.entries = JMDictParser().parse(jmdict_file)

def search(self, text):
def search(self, text: str) -> List[Entry]:
"""Search for an entry by text.
Currently returns all entries that contain the text in either the kanji
Expand All @@ -18,6 +23,11 @@ def search(self, text):
----------
text : str
The text to search for
Returns
-------
List[Entry]
A list of entries that contain the query.
"""
res = []
for entry in self.entries:
Expand All @@ -29,7 +39,7 @@ def search(self, text):
res.append(entry)
return res

def feeling_lucky(self, text):
def feeling_lucky(self, text: str) -> Entry:
"""Return the first entry found.
Currently returns the first entry that contains the text in either the
Expand All @@ -39,6 +49,11 @@ def feeling_lucky(self, text):
----------
text : str
The text to search for
Returns
-------
Entry
The first entry that contains the query.
"""
entries = self.search(text)
return entries[0] if entries else None
Expand Down
12 changes: 6 additions & 6 deletions wsd/parsers/jmdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
@dataclass
class Kanji:
"""Kanji element"""
keb: str
keb: str = field(default_factory=str)
ke_inf: List[str] = field(default_factory=list)
ke_pri: List[str] = field(default_factory=list)

Expand All @@ -27,7 +27,7 @@ def from_node(cls, node):
@dataclass
class Reading:
"""Reading element"""
reb: str
reb: str = field(default_factory=str)
re_nokanji: bool = False
re_restr: List[str] = field(default_factory=list)
re_inf: List[str] = field(default_factory=list)
Expand All @@ -47,8 +47,8 @@ def from_node(cls, node):
@dataclass
class Gloss:
"""A gloss element"""
text: str
lang: str = None
text: str = field(default_factory=str)
lang: str = field(default_factory=str)

@classmethod
def from_node(cls, node):
Expand All @@ -69,7 +69,7 @@ class Sense:
ant: List[str] = field(default_factory=list)
field_: List[str] = field(default_factory=list)
misc: List[str] = field(default_factory=list)
s_inf: str = None
s_inf: str = field(default_factory=str)
lsource: List[str] = field(default_factory=list)
dial: List[str] = field(default_factory=list)
gloss: List[Gloss] = field(default_factory=list)
Expand All @@ -95,7 +95,7 @@ def from_node(cls, node):
@dataclass
class Entry:
"""A dictionary entry"""
ent_seq: str
ent_seq: str = field(default_factory=str)
k_ele: List[Kanji] = field(default_factory=list)
r_ele: List[Reading] = field(default_factory=list)
sense: List[Sense] = field(default_factory=list)
Expand Down

0 comments on commit 88be7e4

Please sign in to comment.