Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make ICU tokenizer the default #2707

Merged
merged 6 commits into from
May 11, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
switch tests to ICU tokenizer as default
lonvia committed May 10, 2022
commit adeebec32a5e75318eb8714a38087055e3b2e757
2 changes: 1 addition & 1 deletion test/bdd/environment.py
Original file line number Diff line number Diff line change
@@ -59,5 +59,5 @@ def after_scenario(context, scenario):

def before_tag(context, tag):
if tag == 'fail-legacy':
if context.config.userdata['TOKENIZER'] in (None, 'legacy'):
if context.config.userdata['TOKENIZER'] == 'legacy':
context.scenario.skip("Not implemented in legacy tokenizer")
2 changes: 1 addition & 1 deletion test/bdd/steps/nominatim_environment.py
Original file line number Diff line number Diff line change
@@ -207,7 +207,7 @@ def setup_api_db(self):
self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
self.run_nominatim('freeze')

if self.tokenizer != 'icu':
if self.tokenizer == 'legacy':
phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
else:
2 changes: 1 addition & 1 deletion test/bdd/steps/steps_db_ops.py
Original file line number Diff line number Diff line change
@@ -266,7 +266,7 @@ def check_word_table_for_postcodes(context, exclude, postcodes):
plist.sort()

with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
if nctx.tokenizer == 'icu':
if nctx.tokenizer != 'legacy':
cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
(plist,))
else:
5 changes: 0 additions & 5 deletions test/python/conftest.py
Original file line number Diff line number Diff line change
@@ -211,11 +211,6 @@ def osmline_table(temp_db_with_extensions, table_factory):
country_code VARCHAR(2)""")


@pytest.fixture
def word_table(temp_db_conn):
return mocks.MockWordTable(temp_db_conn)


@pytest.fixture
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions):
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
2 changes: 1 addition & 1 deletion test/python/mocks.py
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@
from nominatim.db import properties

# This must always point to the mock word table for the default tokenizer.
from mock_legacy_word_table import MockLegacyWordTable as MockWordTable
from mock_icu_word_table import MockIcuWordTable as MockWordTable

class MockPlacexTable:
""" A placex table for testing.
2 changes: 1 addition & 1 deletion test/python/tools/test_database_import.py
Original file line number Diff line number Diff line change
@@ -179,7 +179,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory, w

@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, place_row, placex_table, osmline_table,
word_table, temp_db_cursor, threads):
temp_db_cursor, threads):
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute(f"""CREATE FUNCTION {func} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
8 changes: 7 additions & 1 deletion test/python/tools/test_migration.py
Original file line number Diff line number Diff line change
@@ -14,6 +14,8 @@
from nominatim.errors import UsageError
import nominatim.version

from mock_legacy_word_table import MockLegacyWordTable

class DummyTokenizer:

def update_sql_functions(self, config):
@@ -26,6 +28,10 @@ def postprocess_mock(monkeypatch):
monkeypatch.setattr(migration.tokenizer_factory, 'get_tokenizer_for_db',
lambda *args: DummyTokenizer())

@pytest.fixture
def legacy_word_table(temp_db_conn):
return MockLegacyWordTable(temp_db_conn)


def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_config):
table_factory('country_name', 'name HSTORE, country_code TEXT')
@@ -156,7 +162,7 @@ def test_add_nominatim_property_table_repeat(temp_db_conn, temp_db_cursor,


def test_change_housenumber_transliteration(temp_db_conn, temp_db_cursor,
word_table, placex_table):
legacy_word_table, placex_table):
placex_table.add(housenumber='3A')

temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
2 changes: 1 addition & 1 deletion test/python/tools/test_postcodes.py
Original file line number Diff line number Diff line change
@@ -65,7 +65,7 @@ def tokenizer():
return dummy_tokenizer.DummyTokenizer(None, None)

@pytest.fixture
def postcode_table(temp_db_conn, placex_table, word_table):
def postcode_table(temp_db_conn, placex_table):
return MockPostcodeTable(temp_db_conn)