Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

n-best search #259

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tokenizer/lattice/lattice.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ func additionalCost(n *Node) int {
return 0
}

// Forward runs forward algorithm of the Viterbi.
// Forward is the forward algorithm of the Viterbi.
func (la *Lattice) Forward(m TokenizeMode) {
for i, size := 1, len(la.list); i < size; i++ {
currentList := la.list[i]
Expand Down Expand Up @@ -243,7 +243,7 @@ func (la *Lattice) Forward(m TokenizeMode) {
}
}

// Backward runs backward algorithm of the Viterbi.
// Backward is the backward algorithm of the Viterbi.
func (la *Lattice) Backward(m TokenizeMode) {
const bufferExpandRatio = 2
size := len(la.list)
Expand Down Expand Up @@ -279,6 +279,10 @@ func (la *Lattice) Backward(m TokenizeMode) {
}
}

func (la *Lattice) NBestBackward(m TokenizeMode) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚫 [golangci] reported by reviewdog 🐶
exported: exported method Lattice.NBestBackward should have comment or be unexported (revive)

/** TODO **/
}

func posFeature(d *dict.Dict, u *dict.UserDict, t *Node) string {
var ret []string
switch t.Class {
Expand Down
58 changes: 58 additions & 0 deletions tokenizer/lattice/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,61 @@ var nodePool = sync.Pool{
return new(Node)
},
}

type NodeHeap struct {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚫 [golangci] reported by reviewdog 🐶
exported: exported type NodeHeap should have comment or be unexported (revive)

list []*Node
less func(x, y *Node) bool
}

// Push adds a node to the heap.
func (h *NodeHeap) Push(n *Node) {
i := len(h.list)
h.list = append(h.list, n)
for i != 0 {
p := (i - 1) / 2
if !h.less(h.list[p], h.list[i]) {
h.list[p], h.list[i] = h.list[i], h.list[p]
}
i = p
}
}

// Pop returns the highest priority node of the heap. If the heap is empty, Pop returns nil.
func (h *NodeHeap) Pop() *Node {
if len(h.list) < 1 {
return nil
}
ret := h.list[0]
if len(h.list) > 1 {
h.list[0] = h.list[len(h.list)-1]
}
h.list[len(h.list)-1] = nil
h.list = h.list[:len(h.list)-1]

for i := 0; ; {
min := i
if left := (i+1)*2 - 1; left < len(h.list) && !h.less(h.list[min], h.list[left]) {
min = left
}
if right := (i + 1) * 2; right < len(h.list) && !h.less(h.list[min], h.list[right]) {
min = right
}
if min == i {
break
}
h.list[i], h.list[min] = h.list[min], h.list[i]
i = min
}

return ret
}

// Empty returns true if the heap is empty.
func (h NodeHeap) Empty() bool {
return len(h.list) == 0
}

// Size returns the size of the heap.
func (h NodeHeap) Size() int {
return len(h.list)
}
52 changes: 52 additions & 0 deletions tokenizer/lattice/node_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package lattice

import (
"reflect"
"testing"
)

Expand All @@ -21,3 +22,54 @@ func Test_NodeClassString(t *testing.T) {
}
}
}

func TestNodeHeap_PushPop(t *testing.T) {
idSorter := func(x, y *Node) bool {
return x.ID < y.ID
}
heap := NodeHeap{
less: idSorter,
}
testdata := []struct {
name string
ids []int
want []int
}{
{
name: "ascending order",
ids: []int{1, 2, 3, 4, 5, 6, 7},
want: []int{1, 2, 3, 4, 5, 6, 7},
},
{
name: "descending order",
ids: []int{7, 6, 5, 4, 3, 2, 1},
want: []int{1, 2, 3, 4, 5, 6, 7},
},
{
name: "random order",
ids: []int{3, 6, 4, 1, 7, 5, 2},
want: []int{1, 2, 3, 4, 5, 6, 7},
},
{
name: "list /w duplicate items",
ids: []int{3, 6, 3, 4, 1, 3, 6, 2, 7, 5, 2},
want: []int{1, 2, 2, 3, 3, 3, 4, 5, 6, 6, 7},
},
}
for _, data := range testdata {
for _, v := range data.ids {
heap.Push(&Node{ID: v})
}
got := make([]int, 0, heap.Size())
for !heap.Empty() {
n := heap.Pop()
if n == nil {
t.Fatalf("unexpected nil node, heap=%+v", heap)
}
got = append(got, n.ID)
}
if !reflect.DeepEqual(got, data.want) {
t.Errorf("got %+v, want %+v", got, data.want)
}
}
}