Skip to content

Commit e7895a6

Browse files
committed
Fix missing handling for operator usage in Materialized queries
- Add comment about default - Add success case and a more complex error case
1 parent 7a4b58d commit e7895a6

File tree

3 files changed

+157
-0
lines changed

3 files changed

+157
-0
lines changed

src/hooks/plan_tree_walker.c

+29
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,31 @@
88
#include <nodes/parsenodes.h>
99
#include <nodes/plannodes.h>
1010

11+
#include "../hnsw/utils.h"
12+
1113
bool base_plan_walker(Plan *plan, bool (*walker_func)(Node *plan, void *context), void *context)
1214
{
15+
/*
16+
If there is a need to debug this function, follow the steps below:
17+
0. Add the following as the default branch in plan_tree_walker
18+
default:
19+
{
20+
ldb_dlog("plan_tree_walker: unsupported plan node type: %d", nodeTag(plan));
21+
return false;
22+
}
23+
This will print all nodes that are not explicitly handled by the walker.
24+
Currently there are several such nodes which probably means there are more
25+
latent issues here.
26+
1. Attach gdb to the postgres process
27+
2. Set a breakpoint at the function entry
28+
3. navitate through relevant paths via gdb
29+
4. debug print Plan* nodes via
30+
p (char*) nodeToString(plan);
31+
32+
Note: for non-trivial Plan* nodes you may need to run:
33+
set print elements 0
34+
in gdb to make sure the node string is not truncated.
35+
*/
1336
if(walker_func((Node *)plan->targetlist, context)) return true;
1437
if(walker_func((Node *)plan->qual, context)) return true;
1538
if(walker_func((Node *)plan->lefttree, context)) return true;
@@ -124,6 +147,12 @@ bool plan_tree_walker(Plan *plan, bool (*walker_func)(Node *plan, void *context)
124147
if(walker_func((Node *)append->appendplans, context)) return true;
125148
break;
126149
}
150+
case T_Material:
151+
{
152+
Material *material = (Material *)plan;
153+
if(base_plan_walker(&(material->plan), walker_func, context)) return true;
154+
break;
155+
}
127156
default:
128157
return false;
129158
}

test/expected/hnsw_dist_func.out

+70
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,77 @@ WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}' LIMIT 1) SELECT id, COUNT
222222
ERROR: Operator <-> can only be used inside of an index
223223
WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}') SELECT id FROM t UNION SELECT id FROM t;
224224
ERROR: Operator <-> can only be used inside of an index
225+
-- issue #227
226+
SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v <-> '{1,2}') as forall) haha on TRUE;
227+
ERROR: Operator <-> can only be used inside of an index
228+
-- more complex setup of the above
229+
SELECT forall.id, nearest_per_id.* FROM
230+
(SELECT * FROM
231+
test2) AS forall
232+
JOIN LATERAL (
233+
SELECT
234+
ARRAY_AGG(id ORDER BY id) AS near_ids,
235+
ARRAY_AGG(dist ORDER BY id) AS near_dists
236+
FROM
237+
(
238+
SELECT
239+
id,
240+
l2sq_dist(v, forall.v) as dist
241+
FROM
242+
test2
243+
ORDER BY
244+
v <-> forall.v
245+
LIMIT
246+
5
247+
) as __unused_name
248+
) nearest_per_id on TRUE
249+
ORDER BY
250+
forall.id
251+
LIMIT
252+
9;
253+
ERROR: Operator <-> can only be used inside of an index
225254
\set ON_ERROR_STOP on
255+
-- cross-lateral joins work as expected when appropriate index exists
256+
-- nearest element for each vector
257+
-- Note: The limit below is 4 to make sure all neighbors with distance 1 are included
258+
-- and none of distance 2 are included. if we include some of distance 2, then we need
259+
-- further sorting to make sure ties among nodes with distance 2 are broken consistently
260+
SELECT forall.id, nearest_per_id.* FROM
261+
(SELECT * FROM
262+
small_world_l2) AS forall
263+
JOIN LATERAL (
264+
SELECT
265+
ARRAY_AGG(id ORDER BY dist, id) AS near_ids,
266+
ARRAY_AGG(dist ORDER BY dist, id) AS near_dists
267+
FROM
268+
(
269+
SELECT
270+
id,
271+
l2sq_dist(v, forall.v) as dist
272+
FROM
273+
small_world_l2
274+
ORDER BY
275+
v <-> forall.v
276+
LIMIT
277+
4
278+
) as __unused_name
279+
) nearest_per_id on TRUE
280+
ORDER BY
281+
forall.id
282+
LIMIT
283+
9;
284+
id | near_ids | near_dists
285+
-----+-------------------+------------
286+
000 | {000,001,010,100} | {0,1,1,1}
287+
001 | {001,000,011,101} | {0,1,1,1}
288+
010 | {010,000,011,110} | {0,1,1,1}
289+
011 | {011,001,010,111} | {0,1,1,1}
290+
100 | {100,000,101,110} | {0,1,1,1}
291+
101 | {101,001,100,111} | {0,1,1,1}
292+
110 | {110,010,100,111} | {0,1,1,1}
293+
111 | {111,011,101,110} | {0,1,1,1}
294+
(8 rows)
295+
226296
-- Check that hamming distance query results are sorted correctly
227297
CREATE TABLE extra_small_world_ham (
228298
id SERIAL PRIMARY KEY,

test/sql/hnsw_dist_func.sql

+58
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,65 @@ SELECT t2_results.id FROM test1 t1 JOIN LATERAL (SELECT t2.id FROM test2 t2 ORDE
9292
WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}' LIMIT 1) SELECT DISTINCT id FROM t;
9393
WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}' LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1;
9494
WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}') SELECT id FROM t UNION SELECT id FROM t;
95+
96+
-- issue #227
97+
SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v <-> '{1,2}') as forall) haha on TRUE;
98+
-- more complex setup of the above
99+
SELECT forall.id, nearest_per_id.* FROM
100+
(SELECT * FROM
101+
test2) AS forall
102+
JOIN LATERAL (
103+
SELECT
104+
ARRAY_AGG(id ORDER BY id) AS near_ids,
105+
ARRAY_AGG(dist ORDER BY id) AS near_dists
106+
FROM
107+
(
108+
SELECT
109+
id,
110+
l2sq_dist(v, forall.v) as dist
111+
FROM
112+
test2
113+
ORDER BY
114+
v <-> forall.v
115+
LIMIT
116+
5
117+
) as __unused_name
118+
) nearest_per_id on TRUE
119+
ORDER BY
120+
forall.id
121+
LIMIT
122+
9;
123+
95124
\set ON_ERROR_STOP on
125+
-- cross-lateral joins work as expected when appropriate index exists
126+
-- nearest element for each vector
127+
-- Note: The limit below is 4 to make sure all neighbors with distance 1 are included
128+
-- and none of distance 2 are included. if we include some of distance 2, then we need
129+
-- further sorting to make sure ties among nodes with distance 2 are broken consistently
130+
SELECT forall.id, nearest_per_id.* FROM
131+
(SELECT * FROM
132+
small_world_l2) AS forall
133+
JOIN LATERAL (
134+
SELECT
135+
ARRAY_AGG(id ORDER BY dist, id) AS near_ids,
136+
ARRAY_AGG(dist ORDER BY dist, id) AS near_dists
137+
FROM
138+
(
139+
SELECT
140+
id,
141+
l2sq_dist(v, forall.v) as dist
142+
FROM
143+
small_world_l2
144+
ORDER BY
145+
v <-> forall.v
146+
LIMIT
147+
4
148+
) as __unused_name
149+
) nearest_per_id on TRUE
150+
ORDER BY
151+
forall.id
152+
LIMIT
153+
9;
96154

97155
-- Check that hamming distance query results are sorted correctly
98156
CREATE TABLE extra_small_world_ham (

0 commit comments

Comments
 (0)