Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CI tests #9

Merged
merged 2 commits into from
Jul 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ option(BUILD_WITH_USEARCH "Build with usearch as hnsw provider" ON)
option(BUILD_LIBHNSW "Build libhnsw as hnsw provider" OFF)

# options passed into lanterndb sourcecode
option(LANTERNDB_COPYNODES "Copy postgres index tuples for external retriever during scan instead of pinning" ON)
# todo:: tests for copynodes=ON are broken
option(LANTERNDB_COPYNODES "Copy postgres index tuples for external retriever during scan instead of pinning" OFF)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

Expand Down
5 changes: 5 additions & 0 deletions scripts/run_all_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ do
${PSQL} postgres -c "drop database if exists ${TESTDB};"
${PSQL} postgres -c "create database ${TESTDB};"
base=$(basename $testfile .sql)

# psql options
# -e: echo commands
# -E: (passed manually, for debugging) echo hidden magic commands (\d, \di+, etc)
${PSQL} testdb --quiet -f test/sql/test_helpers/common.sql > /dev/null
${PSQL} testdb -ef test/sql/$base.sql > $TMP_OUTDIR/$base.out 2>&1 || true
DIFF=$(diff test/expected/$base.out $TMP_OUTDIR/$base.out || true)
# diff has non-zero exit code if files differ. ||true gets rid of error value
Expand Down
13 changes: 5 additions & 8 deletions test/expected/debug_helpers.out
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,11 @@ psql:test/sql/debug_helpers.sql:8: INFO: done init usearch index
psql:test/sql/debug_helpers.sql:8: INFO: inserted 8 elements
psql:test/sql/debug_helpers.sql:8: INFO: done saving 8 vectors
CREATE INDEX
Table "public.small_world"
Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description
--------+----------------------+-----------+----------+---------+----------+-------------+--------------+-------------
id | character varying(3) | | | | extended | | |
vector | vector(3) | | | | extended | | |
Indexes:
"small_world_vector_idx" hnsw (vector)
Access method: heap
SELECT * FROM ldb_get_indexes('small_world');
indexname | size | indexdef
------------------------+--------+-------------------------------------------------------------------------------
small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector)
(1 row)

SHOW hnsw.init_k;
hnsw.init_k
Expand Down
39 changes: 15 additions & 24 deletions test/expected/hnsw.out
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,11 @@ psql:test/sql/hnsw.sql:43: INFO: done init usearch index
psql:test/sql/hnsw.sql:43: INFO: inserted 8 elements
psql:test/sql/hnsw.sql:43: INFO: done saving 8 vectors
CREATE INDEX
Table "public.small_world"
Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description
--------+----------------------+-----------+----------+---------+----------+-------------+--------------+-------------
id | character varying(3) | | | | extended | | |
vector | vector(3) | | | | extended | | |
Indexes:
"small_world_vector_idx" hnsw (vector)
Access method: heap
SELECT * FROM ldb_get_indexes('small_world');
indexname | size | indexdef
------------------------+--------+-------------------------------------------------------------------------------
small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector)
(1 row)

SELECT * FROM (
SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist
Expand Down Expand Up @@ -113,14 +110,11 @@ psql:test/sql/hnsw.sql:59: INFO: done init usearch index
psql:test/sql/hnsw.sql:59: INFO: inserted 8 elements
psql:test/sql/hnsw.sql:59: INFO: done saving 8 vectors
CREATE INDEX
Table "public.small_world"
Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description
--------+----------------------+-----------+----------+---------+----------+-------------+--------------+-------------
id | character varying(3) | | | | extended | | |
vector | vector(3) | | | | extended | | |
Indexes:
"small_world_vector_idx" hnsw (vector) WITH (m='2', ef='11', ef_construction='12')
Access method: heap
SELECT * FROM ldb_get_indexes('small_world');
indexname | size | indexdef
------------------------+--------+---------------------------------------------------------------------------------------------------------------------------
small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector) WITH (m='2', ef='11', ef_construction='12')
(1 row)

SELECT ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist
FROM small_world
Expand Down Expand Up @@ -192,14 +186,11 @@ psql:test/sql/hnsw.sql:85: INFO: done init usearch index
psql:test/sql/hnsw.sql:85: INFO: inserted 8 elements
psql:test/sql/hnsw.sql:85: INFO: done saving 8 vectors
CREATE INDEX
Table "public.small_world"
Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description
--------+----------------------+-----------+----------+---------+----------+-------------+--------------+-------------
id | character varying(3) | | | | extended | | |
vector | vector(3) | | | | extended | | |
Indexes:
"small_world_vector_idx" hnsw (vector) WITH (m='11', ef='2', ef_construction='2')
Access method: heap
SELECT * FROM ldb_get_indexes('small_world');
indexname | size | indexdef
------------------------+--------+--------------------------------------------------------------------------------------------------------------------------
small_world_vector_idx | 176 kB | CREATE INDEX small_world_vector_idx ON public.small_world USING hnsw (vector) WITH (m='11', ef='2', ef_construction='2')
(1 row)

SELECT * FROM (
SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist
Expand Down
36 changes: 16 additions & 20 deletions test/expected/hnsw_insert.out
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ psql:test/sql/hnsw_insert.sql:8: INFO: done saving 1000 vectors
CREATE INDEX
SET enable_seqscan = off;
SET
insert into small_world (id, vector) values ('xxx', '[0,0,0]');
INSERT INTO small_world (id, vector) VALUES ('xxx', '[0,0,0]');
INSERT 0 1
insert into small_world (id, vector) values ('x11', '[0,0,110]');
INSERT INTO small_world (id, vector) VALUES ('x11', '[0,0,110]');
INSERT 0 1
INSERT INTO small_world (id, vector) VALUES
('000', '[0,0,0]'),
Expand Down Expand Up @@ -217,38 +217,34 @@ psql:test/sql/hnsw_insert.sql:77: INFO: usearch index initialized
010 | 1.00
(10 rows)

select count(*) from sift_base1k;
SELECT count(*) from sift_base1k;
psql:test/sql/hnsw_insert.sql:79: INFO: cost estimate
count
-------
1000
(1 row)

List of relations
Schema | Name | Type | Owner | Table | Persistence | Access method | Size | Description
--------+----------------------------+-------+-----------+-----------------+-------------+---------------+--------+-------------
public | new_small_world_vector_idx | index | ngalstyan | new_small_world | permanent | hnsw | 176 kB |
public | sift_base1k_pkey | index | ngalstyan | sift_base1k | permanent | btree | 40 kB |
public | sift_base1k_v_idx | index | ngalstyan | sift_base1k | permanent | hnsw | 872 kB |
public | small_world_vector_idx | index | ngalstyan | small_world | permanent | hnsw | 176 kB |
(4 rows)
SELECT * from ldb_get_indexes('sift_base1k');
indexname | size | indexdef
-------------------+--------+-----------------------------------------------------------------------------
sift_base1k_pkey | 40 kB | CREATE UNIQUE INDEX sift_base1k_pkey ON public.sift_base1k USING btree (id)
sift_base1k_v_idx | 872 kB | CREATE INDEX sift_base1k_v_idx ON public.sift_base1k USING hnsw (v)
(2 rows)

INSERT INTO sift_base1k(v)
SELECT v FROM sift_base1k WHERE id <= 444 AND v IS NOT NULL;
INSERT 0 444
select count(*) from sift_base1k;
SELECT count(*) from sift_base1k;
psql:test/sql/hnsw_insert.sql:83: INFO: cost estimate
count
-------
1444
(1 row)

List of relations
Schema | Name | Type | Owner | Table | Persistence | Access method | Size | Description
--------+----------------------------+-------+-----------+-----------------+-------------+---------------+---------+-------------
public | new_small_world_vector_idx | index | ngalstyan | new_small_world | permanent | hnsw | 176 kB |
public | sift_base1k_pkey | index | ngalstyan | sift_base1k | permanent | btree | 48 kB |
public | sift_base1k_v_idx | index | ngalstyan | sift_base1k | permanent | hnsw | 1168 kB |
public | small_world_vector_idx | index | ngalstyan | small_world | permanent | hnsw | 176 kB |
(4 rows)
SELECT * from ldb_get_indexes('sift_base1k');
indexname | size | indexdef
-------------------+---------+-----------------------------------------------------------------------------
sift_base1k_pkey | 48 kB | CREATE UNIQUE INDEX sift_base1k_pkey ON public.sift_base1k USING btree (id)
sift_base1k_v_idx | 1168 kB | CREATE INDEX sift_base1k_v_idx ON public.sift_base1k USING hnsw (v)
(2 rows)

31 changes: 6 additions & 25 deletions test/expected/wiki.out
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ SET row_security = off;
SET
SET default_tablespace = '';
SET
SET default_table_access_method = heap;
SET
CREATE TABLE tsv_data (
language text,
page_url text,
Expand Down Expand Up @@ -87,34 +85,17 @@ with t as (select id, page_title, context_page_description_ai <-> (select conte
(10 rows)

CREATE INDEX index1 ON tsv_data USING hnsw (context_page_description_ai vector_l2_ops);
psql:test/sql/wiki.sql:66: INFO: done init usearch index
psql:test/sql/wiki.sql:66: INFO: inserted 100 elements
psql:test/sql/wiki.sql:66: INFO: done saving 100 vectors
psql:test/sql/wiki.sql:64: INFO: done init usearch index
psql:test/sql/wiki.sql:64: INFO: inserted 100 elements
psql:test/sql/wiki.sql:64: INFO: done saving 100 vectors
CREATE INDEX
CREATE INDEX ON tsv_data USING hnsw (context_page_description_ai) with (ef = 100, ef_construction=150 , M=11, alg="hnswlib");
psql:test/sql/wiki.sql:67: INFO: done init usearch index
psql:test/sql/wiki.sql:67: INFO: inserted 100 elements
psql:test/sql/wiki.sql:67: INFO: done saving 100 vectors
psql:test/sql/wiki.sql:65: INFO: done init usearch index
psql:test/sql/wiki.sql:65: INFO: inserted 100 elements
psql:test/sql/wiki.sql:65: INFO: done saving 100 vectors
CREATE INDEX
set enable_seqscan=false;
SET
explain with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist
from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t;
psql:test/sql/wiki.sql:71: INFO: cost estimate
psql:test/sql/wiki.sql:71: INFO: returning small cost to always use the index
psql:test/sql/wiki.sql:71: INFO: cost estimate
psql:test/sql/wiki.sql:71: INFO: returning small cost to always use the index
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Subquery Scan on t (cost=8.16..8.74 rows=10 width=68)
-> Limit (cost=8.16..8.59 rows=10 width=44)
InitPlan 1 (returns $0)
-> Index Scan using tsv_data_pkey on tsv_data tsv_data_1 (cost=0.14..8.16 rows=1 width=32)
Index Cond: (id = 81386)
-> Index Scan using tsv_data_context_page_description_ai_idx on tsv_data (cost=0.00..4.26 rows=100 width=44)
Order By: (context_page_description_ai <-> $0)
(7 rows)

with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist
from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t;
psql:test/sql/wiki.sql:75: INFO: cost estimate
Expand Down
3 changes: 2 additions & 1 deletion test/sql/debug_helpers.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ CREATE EXTENSION IF NOT EXISTS lanterndb;
SHOW hnsw.init_k;

CREATE INDEX ON small_world USING hnsw (vector);
\d+ small_world
-- verify that the index was created
SELECT * FROM ldb_get_indexes('small_world');

-- it exists after we create an index
SHOW hnsw.init_k;
Expand Down
6 changes: 3 additions & 3 deletions test/sql/hnsw.sql
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ SET enable_seqscan = off;

begin;
CREATE INDEX ON small_world USING hnsw (vector);
\d+ small_world
SELECT * FROM ldb_get_indexes('small_world');
SELECT * FROM (
SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist
FROM small_world
Expand All @@ -57,7 +57,7 @@ rollback;

begin;
CREATE INDEX ON small_world USING hnsw (vector) WITH (M=2, ef=11, ef_construction=12);
\d+ small_world
SELECT * FROM ldb_get_indexes('small_world');
-- Equidistant points from the given vector appear in different order in the output of the inner query
-- depending on postgres version and platform. The outder query forces a deterministic order.
-- Unfortunately, outer query resorts distances as well so if the index sorted them in a wrong order,
Expand All @@ -83,7 +83,7 @@ rollback;

begin;
CREATE INDEX ON small_world USING hnsw (vector) WITH (M=11, ef=2, ef_construction=2);
\d+ small_world
SELECT * FROM ldb_get_indexes('small_world');
SELECT * FROM (
SELECT id, ROUND( (vector <-> '[0,0,0]')::numeric, 2) as dist
FROM small_world
Expand Down
12 changes: 6 additions & 6 deletions test/sql/hnsw_insert.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ CREATE INDEX ON sift_base1k USING hnsw (v);

SET enable_seqscan = off;

insert into small_world (id, vector) values ('xxx', '[0,0,0]');
insert into small_world (id, vector) values ('x11', '[0,0,110]');
INSERT INTO small_world (id, vector) VALUES ('xxx', '[0,0,0]');
INSERT INTO small_world (id, vector) VALUES ('x11', '[0,0,110]');
INSERT INTO small_world (id, vector) VALUES
('000', '[0,0,0]'),
('001', '[0,0,1]'),
Expand Down Expand Up @@ -76,10 +76,10 @@ SELECT '[0,0,0]'::vector as v42 \gset
EXPLAIN SELECT id, ROUND((vector <-> :'v42')::numeric, 2) FROM new_small_world ORDER BY vector <-> :'v42' LIMIT 10;
SELECT id, ROUND((vector <-> :'v42')::numeric, 2) FROM new_small_world ORDER BY vector <-> :'v42' LIMIT 10;

select count(*) from sift_base1k;
\di+
SELECT count(*) from sift_base1k;
SELECT * from ldb_get_indexes('sift_base1k');
INSERT INTO sift_base1k(v)
SELECT v FROM sift_base1k WHERE id <= 444 AND v IS NOT NULL;
select count(*) from sift_base1k;
\di+
SELECT count(*) from sift_base1k;
SELECT * from ldb_get_indexes('sift_base1k');

20 changes: 20 additions & 0 deletions test/sql/test_helpers/common.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--test helper functions that should exist in all test runs live here
-- there is no need to explicitly include this file in other tests as the test runner will
-- run this before running the actual test

CREATE EXTENSION pageinspect;

--todo:: add a columns to this function which returning number of used DB pages and total index size
\set ON_ERROR_STOP on
CREATE OR REPLACE FUNCTION ldb_get_indexes(tblname text)
RETURNS TABLE(indexname name, size text, indexdef text) AS
$BODY$
BEGIN
RETURN QUERY
SELECT pg_indexes.indexname,
pg_size_pretty(pg_relation_size(pg_indexes.indexname::REGCLASS)) as size,
pg_indexes.indexdef
FROM pg_indexes
WHERE tablename = tblname;
END;
$BODY$ LANGUAGE plpgsql;
8 changes: 4 additions & 4 deletions test/sql/wiki.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ SET row_security = off;

SET default_tablespace = '';

SET default_table_access_method = heap;

CREATE TABLE tsv_data (
language text,
page_url text,
Expand Down Expand Up @@ -67,8 +65,10 @@ CREATE INDEX index1 ON tsv_data USING hnsw (context_page_description_ai vector_l
CREATE INDEX ON tsv_data USING hnsw (context_page_description_ai) with (ef = 100, ef_construction=150 , M=11, alg="hnswlib");
set enable_seqscan=false;

explain with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist
from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t;
-- todo:: find a different way to ensure that the index used. "\set enable_seqscan=false;" is not enough
-- and, the following produces a different output on pg11
-- explain with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist
-- from tsv_data order by dist limit 10) select id, page_title, ROUND( dist::numeric, 2) from t;

-- introduce a WITH statement to round returned distances AFTER a lookup so the index can be used
with t as (select id, page_title, context_page_description_ai <-> (select context_page_description_ai from tsv_data where id = 81386) as dist
Expand Down