Skip to content
This repository was archived by the owner on Feb 15, 2023. It is now read-only.

Commit c5ab442

Browse files
committed
Helping PG planner with some stats
1 parent 72492cd commit c5ab442

File tree

1 file changed

+39
-16
lines changed

1 file changed

+39
-16
lines changed

neo4jPg/neo4jfdw.py

+39-16
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ class Neo4jForeignDataWrapper(ForeignDataWrapper):
1010
Neo4j FWD for Postgresql
1111
"""
1212

13+
_startup_cost = 20
14+
1315
def __init__(self, options, columns):
1416

1517
# Calling super constructor
@@ -39,7 +41,28 @@ def __init__(self, options, columns):
3941
self.driver = GraphDatabase.driver( self.url, auth=basic_auth(self.user, self.password))
4042

4143
self.columns_stat = self.compute_columns_stat()
44+
self.table_stat = self.compute_table_stat();
45+
46+
47+
def get_rel_size(self, quals, columns):
48+
"""
49+
This method must return a tuple of the form (expected_number_of_row, expected_mean_width_of_a_row (in bytes)).
50+
The quals and columns arguments can be used to compute those estimates.
51+
For example, the imapfdw computes a huge width whenever the payload column is requested.
52+
"""
53+
log_to_postgres('get_rel_size is called', DEBUG)
54+
# TODO: take the min of the columns stat based on the quals ?
55+
return (self.table_stat, len(columns)*100)
4256

57+
def get_path_keys(self):
58+
"""
59+
This method must return a list of tuple of the form (column_name, expected_number_of_row).
60+
The expected_number_of_row must be computed as if a where column_name = some_value filter were applied.
61+
This helps the planner to estimate parameterized paths cost, and change the plan accordingly.
62+
For example, informing the planner that a filter on a column may return exactly one row, instead of the full billion, may help it on deciding to use a nested-loop instead of a full sequential scan.
63+
"""
64+
log_to_postgres('get_path_keys is called', DEBUG)
65+
return self.columns_stat
4366

4467
def execute(self, quals, columns, sortkeys=None):
4568

@@ -69,7 +92,6 @@ def execute(self, quals, columns, sortkeys=None):
6992
finally:
7093
session.close()
7194

72-
7395
def make_cypher(self, quals, columns, sortkeys):
7496
"""
7597
Override cypher query to add search criteria
@@ -195,12 +217,6 @@ def generate_condition(self, field_name, operator, value, cypher_variable):
195217
return condition
196218

197219
def compute_columns_stat(self):
198-
"""
199-
This method must return a list of tuple of the form (column_name, expected_number_of_row).
200-
The expected_number_of_row must be computed as if a where column_name = some_value filter were applied.
201-
This helps the planner to estimate parameterized paths cost, and change the plan accordingly.
202-
For example, informing the planner that a filter on a column may return exactly one row, instead of the full billion, may help it on deciding to use a nested-loop instead of a full sequential scan.
203-
"""
204220
result = list();
205221

206222
session = self.driver.session()
@@ -213,9 +229,9 @@ def compute_columns_stat(self):
213229
stats = explain_summary['EstimatedRows']
214230

215231
log_to_postgres('Explain query for column ' + unicode(column_name) + ' is : ' + unicode(query), DEBUG)
216-
log_to_postgres('Explain for column ' + unicode(column_name) + ' is : ' + unicode(explain_summary['EstimatedRows']), DEBUG)
232+
log_to_postgres('Stat for column ' + unicode(column_name) + ' is : ' + unicode(explain_summary['EstimatedRows']), DEBUG)
217233

218-
result.append((column_name, stats))
234+
result.append(((column_name,), int(stats)))
219235

220236
except CypherError:
221237
raise RuntimeError("Bad cypher query : " + query)
@@ -225,12 +241,19 @@ def compute_columns_stat(self):
225241
log_to_postgres('Columns stats are :' + unicode(result), DEBUG)
226242
return result
227243

228-
def get_path_keys(self):
229-
return self.columns_stat
230-
244+
def compute_table_stat(self):
245+
stats = 100000000
246+
session = self.driver.session()
247+
try:
248+
rs = session.run('EXPLAIN ' + self.cypher, {})
249+
explain_summary = rs.summary().plan[2]
250+
stats = explain_summary['EstimatedRows']
251+
log_to_postgres('Stat for table is ' + unicode(explain_summary['EstimatedRows']), DEBUG)
252+
except CypherError:
253+
raise RuntimeError("Bad cypher query : " + cypher)
254+
finally:
255+
session.close()
231256

232-
# def get_rel_size(self, quals, columns):
257+
log_to_postgres('Table stat is :' + unicode(stats), DEBUG)
258+
return stats
233259

234-
# def insert(self, new_values):
235-
# def update(self, old_values, new_values):
236-
# def delete(self, old_values):

0 commit comments

Comments
 (0)