Skip to content

Commit 3591f5e

Browse files
Noriyuki Takahashiyukawa
Noriyuki Takahashi
authored andcommitted
Introduce select0/1 cache in LOUDS.
The common usage of LOUDS includes: i) traversal downward from the root (select0 is computed), ii) traversal upward to the root (select1 is computed). In both cases, nodes close to the root are accessed. Therefore, caching select0/1 values for such nodes improves traversal speed. BUG= TEST=unittest REF_BUG=21859420 REF_CL=96271561
1 parent 11cc541 commit 3591f5e

11 files changed

+342
-23
lines changed

src/dictionary/system/system_dictionary.cc

+11-2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ namespace {
8080

8181
const int kMinTokenArrayBlobSize = 4;
8282

83+
const size_t kKeyTrieSelect0CacheSize = 4 * 1024;
84+
const size_t kKeyTrieSelect1CacheSize = 4 * 1024;
85+
const size_t kValueTrieSelect0CacheSize = 1 * 1024;
86+
const size_t kValueTrieSelect1CacheSize = 16 * 1024;
87+
8388
// Expansion table format:
8489
// "<Character to expand>[<Expanded character 1><Expanded character 2>...]"
8590
//
@@ -500,7 +505,9 @@ bool SystemDictionary::OpenDictionaryFile(bool enable_reverse_lookup_index) {
500505

501506
const uint8 *key_image = reinterpret_cast<const uint8 *>(
502507
dictionary_file_->GetSection(codec_->GetSectionNameForKey(), &len));
503-
if (!key_trie_.Open(key_image)) {
508+
if (!key_trie_.Open(key_image,
509+
kKeyTrieSelect0CacheSize,
510+
kKeyTrieSelect1CacheSize)) {
504511
LOG(ERROR) << "cannot open key trie";
505512
return false;
506513
}
@@ -509,7 +516,9 @@ bool SystemDictionary::OpenDictionaryFile(bool enable_reverse_lookup_index) {
509516

510517
const uint8 *value_image = reinterpret_cast<const uint8 *>(
511518
dictionary_file_->GetSection(codec_->GetSectionNameForValue(), &len));
512-
if (!value_trie_.Open(value_image)) {
519+
if (!value_trie_.Open(value_image,
520+
kValueTrieSelect0CacheSize,
521+
kValueTrieSelect1CacheSize)) {
513522
LOG(ERROR) << "can not open value trie";
514523
return false;
515524
}

src/mozc_version_template.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MAJOR=2
22
MINOR=17
3-
BUILD=2259
3+
BUILD=2261
44
REVISION=102
55
# NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
66
# downloaded by NaCl Mozc.

src/storage/louds/louds.cc

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2010-2015, Google Inc.
2+
// All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions are
6+
// met:
7+
//
8+
// * Redistributions of source code must retain the above copyright
9+
// notice, this list of conditions and the following disclaimer.
10+
// * Redistributions in binary form must reproduce the above
11+
// copyright notice, this list of conditions and the following disclaimer
12+
// in the documentation and/or other materials provided with the
13+
// distribution.
14+
// * Neither the name of Google Inc. nor the names of its
15+
// contributors may be used to endorse or promote products derived from
16+
// this software without specific prior written permission.
17+
//
18+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
30+
#include "storage/louds/louds.h"
31+
32+
namespace mozc {
33+
namespace storage {
34+
namespace louds {
35+
36+
Louds::Louds() : select0_cache_size_(0), select1_cache_size_(0) {}
37+
38+
Louds::~Louds() {}
39+
40+
void Louds::Init(const uint8 *image, int length,
41+
size_t select0_cache_size, size_t select1_cache_size) {
42+
index_.Init(image, length);
43+
44+
// Cap the cache sizes.
45+
if (select0_cache_size > index_.GetNum0Bits()) {
46+
select0_cache_size = index_.GetNum0Bits();
47+
}
48+
if (select1_cache_size > index_.GetNum1Bits()) {
49+
select1_cache_size = index_.GetNum1Bits();
50+
}
51+
52+
// Initialize Select0 and Select1 cache for speed. In LOUDS traversal, nodes
53+
// close to the root are frequently accessed. Thus, we precompute select0 and
54+
// select1 values for such nodes. Since node IDs are assigned in BFS order,
55+
// the nodes close to the root are assigned smaller IDs. Hence, a simple
56+
// array can be used for the mapping from ID to cached value.
57+
select0_cache_size_ = select0_cache_size;
58+
select1_cache_size_ = select1_cache_size;
59+
const size_t cache_size = select0_cache_size + select1_cache_size;
60+
if (cache_size == 0) {
61+
return;
62+
}
63+
select_cache_.reset(new int[cache_size]);
64+
65+
if (select0_cache_size > 0) {
66+
// Precompute Select0(i) + 1 for i in (0, select0_cache_size).
67+
select_cache_[0] = 0;
68+
for (size_t i = 1; i < select0_cache_size; ++i) {
69+
select_cache_[i] = index_.Select0(i) + 1;
70+
}
71+
}
72+
73+
if (select1_cache_size > 0) {
74+
// Precompute Select1(i) for i in (0, select1_cache_size).
75+
select1_cache_ptr_ = select_cache_.get() + select0_cache_size;
76+
select1_cache_ptr_[0] = 0;
77+
for (size_t i = 1; i < select1_cache_size; ++i) {
78+
select1_cache_ptr_[i] = index_.Select1(i);
79+
}
80+
}
81+
}
82+
83+
void Louds::Reset() {
84+
index_.Reset();
85+
select_cache_.reset(nullptr);
86+
select0_cache_size_ = 0;
87+
select1_cache_size_ = 0;
88+
}
89+
90+
} // namespace louds
91+
} // namespace storage
92+
} // namespace mozc

src/storage/louds/louds.gyp

+6-2
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,14 @@
3232
# Implementation of a Trie data structure based on LOUDS and its builder.
3333
{
3434
'target_name': 'louds',
35-
'type': 'none',
35+
'type': 'static_library',
3636
'toolsets': ['target', 'host'],
3737
'sources': [
38-
'louds.h',
38+
'louds.cc',
39+
],
40+
'dependencies': [
41+
'../../base/base.gyp:base',
42+
'simple_succinct_bit_vector_index',
3943
],
4044
},
4145
{

src/storage/louds/louds.h

+30-11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#ifndef MOZC_STORAGE_LOUDS_LOUDS_H_
3131
#define MOZC_STORAGE_LOUDS_LOUDS_H_
3232

33+
#include <memory>
34+
3335
#include "base/port.h"
3436
#include "storage/louds/simple_succinct_bit_vector_index.h"
3537

@@ -57,7 +59,8 @@ namespace louds {
5759
// Node: 0 1 2 3 4 5
5860
// LOUDS: 10 110 0 110 0 0
5961
//
60-
// This class provides basic APIs to traverse a tree structure.
62+
// This class provides basic APIs to traverse a tree structure. Performance
63+
// critical methods are inlined.
6164
class Louds {
6265
public:
6366
// Represents and stores location (tree node) for tree traversal. By storing
@@ -86,26 +89,34 @@ class Louds {
8689
friend class Louds;
8790
};
8891

89-
Louds() {}
90-
~Louds() {}
92+
Louds();
93+
~Louds();
94+
95+
// Initializes this LOUDS from bit array. To improve the performance of
96+
// downward traversal (i.e., from root to leaves), set |select0_cache_size| to
97+
// a larger value. On the other hand, to improve the performance of upward
98+
// traversal (i.e., from leaves to the root), set |select1_cache_size| to a
99+
// larger value.
100+
void Init(const uint8 *image, int length,
101+
size_t select0_cache_size, size_t select1_cache_size);
91102

92-
// Initializes this LOUDS from bit array.
103+
// Initializes this LOUDS from bit array without cache.
93104
void Init(const uint8 *image, int length) {
94-
index_.Init(image, length);
105+
Init(image, length, 0, 0);
95106
}
96107

97108
// Explicitly clears the internal bit array.
98-
void Reset() {
99-
index_.Reset();
100-
}
109+
void Reset();
101110

102111
// APIs for traversal (all the methods are inline for performance).
103112

104113
// Initializes a Node instance from node ID.
105114
// Note: to get the root node, just allocate a default Node instance.
106115
void InitNodeFromNodeId(int node_id, Node *node) const {
107116
node->node_id_ = node_id;
108-
node->edge_index_ = index_.Select1(node->node_id_);
117+
node->edge_index_ = node_id < select1_cache_size_
118+
? select1_cache_ptr_[node_id]
119+
: index_.Select1(node_id);
109120
}
110121

111122
// Returns true if the given node is the root.
@@ -121,7 +132,9 @@ class Louds {
121132
// * node 4 -> invalid node
122133
// REQUIRES: |node| is valid.
123134
void MoveToFirstChild(Node *node) const {
124-
node->edge_index_ = index_.Select0(node->node_id_) + 1;
135+
node->edge_index_ = node->node_id_ < select0_cache_size_
136+
? select_cache_[node->node_id_]
137+
: index_.Select0(node->node_id_) + 1;
125138
node->node_id_ = node->edge_index_ - node->node_id_ + 1;
126139
}
127140

@@ -145,7 +158,9 @@ class Louds {
145158
// REQUIRES: |node| is valid and not root.
146159
void MoveToParent(Node *node) const {
147160
node->node_id_ = node->edge_index_ - node->node_id_ + 1;
148-
node->edge_index_ = index_.Select1(node->node_id_);
161+
node->edge_index_ = node->node_id_ < select1_cache_size_
162+
? select1_cache_ptr_[node->node_id_]
163+
: index_.Select1(node->node_id_);
149164
}
150165

151166
// Returns true if |node| is in a valid state.
@@ -155,6 +170,10 @@ class Louds {
155170

156171
private:
157172
SimpleSuccinctBitVectorIndex index_;
173+
size_t select0_cache_size_;
174+
size_t select1_cache_size_;
175+
std::unique_ptr<int[]> select_cache_;
176+
int* select1_cache_ptr_; // = select_cache_.get() + select0_cache_size_
158177

159178
DISALLOW_COPY_AND_ASSIGN(Louds);
160179
};

0 commit comments

Comments
 (0)