Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

View render optimization #6

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion alembic/versions/6898afa765ca_create_view_schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Create view schema

Revision ID: 6898afa765ca
Revises: 8dd630f55d05
Revises: 8dd630f55d05
Create Date: 2023-03-21 16:54:56.498038

"""
Expand Down
2 changes: 1 addition & 1 deletion alembic/versions/7367a058533d_create_authn_authz.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Create authentication/authorization models

Revision ID: 7367a058533d
Revises:
Revises:
Create Date: 2023-03-21 17:25:22.277920

"""
Expand Down
31 changes: 25 additions & 6 deletions gerrydb_meta/crud/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
from datetime import datetime, timezone
from typing import Any, Collection, Tuple

from sqlalchemy import exc, insert, update
from sqlalchemy import exc, insert, update, text
from sqlalchemy.orm import Session

from gerrydb_meta import models, schemas
from gerrydb_meta.crud.base import NamespacedCRBase, normalize_path
from gerrydb_meta.enums import ColumnType
from gerrydb_meta.exceptions import ColumnValueTypeError, CreateValueError
from gerrydb_meta.utils import create_column_value_partition_text

log = logging.getLogger()

Expand Down Expand Up @@ -80,6 +81,9 @@ def create(
canonical_ref.col_id = col.col_id
db.flush()

# create partition
db.execute(create_column_value_partition_text(column_id=col.col_id))

# Create additional aliases (non-canonical references) to the column.
if obj_in.aliases:
self._add_aliases(
Expand Down Expand Up @@ -202,8 +206,16 @@ def set_values(

# Add the new column values and invalidate the old ones where present.
geo_ids = [geo.geo_id for geo, _ in values]
with_values = (
db.query(models.ColumnValue.val_id)

# make sure partition exists for column
db.execute(create_column_value_partition_text(column_id=col.col_id))

with_tuples = (
db.query(
models.ColumnValue.col_id,
models.ColumnValue.geo_id,
models.ColumnValue.valid_from,
)
.filter(
models.ColumnValue.col_id == col.col_id,
models.ColumnValue.geo_id.in_(geo_ids),
Expand All @@ -212,6 +224,8 @@ def set_values(
.all()
)

with_values = ["_".join([str(val) for val in tup]) for tup in with_tuples]

with db.begin(nested=True):
db.execute(insert(models.ColumnValue), rows)
# Optimization: most column values are only set once, so we don't
Expand All @@ -220,9 +234,14 @@ def set_values(
db.execute(
update(models.ColumnValue)
.where(
models.ColumnValue.val_id.in_(
val.val_id for val in with_values
),
"_".join(
[
str(models.ColumnValue.col_id),
str(models.ColumnValue.geo_id),
str(models.ColumnValue.valid_from),
]
)
in with_values
)
.values(valid_to=now)
)
Expand Down
71 changes: 38 additions & 33 deletions gerrydb_meta/crud/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

from geoalchemy2 import Geometry
from geoalchemy2 import func as geo_func
from sqlalchemy import Sequence, cast, exc, func, label, or_, select, union
from sqlalchemy import Sequence, cast, exc, func, label, or_, select, union
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy.sql import text, column

from gerrydb_meta import models, schemas
from gerrydb_meta.crud.base import NamespacedCRBase, normalize_path
Expand Down Expand Up @@ -358,7 +359,7 @@ def get_cached_render(
.order_by(models.ViewRender.created_at.desc())
.first()
)

def render(self, db: Session, *, view: models.View) -> ViewRenderContext:
"""Generates queries to retrieve view data.

Expand All @@ -371,32 +372,25 @@ def render(self, db: Session, *, view: models.View) -> ViewRenderContext:
models.GeoSetMember.geo_id,
)
.filter(models.GeoSetMember.set_version_id == view.set_version_id)
.subquery()
.subquery("members_sub")
)
geo_sub = select(models.Geography.geo_id, models.Geography.path).subquery()

# Generate subqueries for joining tabular data.
column_subs = []
column_labels = []
for alias, col in columns.items():
value_col = COLUMN_TYPE_TO_VALUE_COLUMN[col.type]
column_sub = (
select(
models.ColumnValue.geo_id,
getattr(models.ColumnValue, value_col).label(alias),
)
.filter(
models.ColumnValue.col_id == col.col_id,
models.ColumnValue.valid_from <= view.at,
or_(
models.ColumnValue.valid_to.is_(None),
models.ColumnValue.valid_to >= view.at,
),
)
.subquery()
)
column_subs.append(column_sub)
column_labels.append(column_sub.c[col.canonical_ref.path])
geo_sub = select(models.Geography.geo_id, models.Geography.path).subquery("geo_sub")

agg_selects=[]
column_labels=[]
col_ids=[]
for _, col in columns.items():
agg_selects.append(func.max(column(COLUMN_TYPE_TO_VALUE_COLUMN[col.type])).filter(models.ColumnValue.col_id==col.col_id).label(col.canonical_ref.path))
column_labels.append(column(col.canonical_ref.path))
col_ids.append(col.col_id)

column_sub= select(models.ColumnValue.geo_id,*agg_selects).where(
models.ColumnValue.col_id.in_(col_ids),
models.ColumnValue.valid_from <= view.at,
or_(
models.ColumnValue.valid_to.is_(None),
models.ColumnValue.valid_to >= view.at,
)).group_by(models.ColumnValue.geo_id).subquery("column_value")

timestamp_clauses = [
models.GeoVersion.valid_from <= view.at,
Expand All @@ -406,6 +400,18 @@ def render(self, db: Session, *, view: models.View) -> ViewRenderContext:
),
]

## included for reference: a version without subqueries.
# geo_query=(
# select(models.Geography.path,
# models.GeoVersion.geography,
# *column_labels,
# ).join(models.GeoSetMember, models.GeoSetMember.geo_id==models.Geography.geo_id)
# .join(models.GeoVersion, models.GeoSetMember.geo_id==models.GeoVersion.geo_id)
# .join(column_sub, column_sub.c.geo_id==models.Geography.geo_id)
# .where(models.GeoSetMember.set_version_id == view.set_version_id, *timestamp_clauses)
# )


geo_query = (
select(
geo_sub.c.path,
Expand All @@ -418,12 +424,10 @@ def render(self, db: Session, *, view: models.View) -> ViewRenderContext:
)
.join(geo_sub, geo_sub.c.geo_id == models.GeoVersion.geo_id)
)

# Join tabular data.
for column_sub in column_subs:
geo_query = geo_query.join(
column_sub, column_sub.c.geo_id == models.GeoVersion.geo_id
)

geo_query = geo_query.join(
column_sub, column_sub.c.geo_id == models.GeoVersion.geo_id
)
geo_query = geo_query.where(*timestamp_clauses)

internal_point_query = (
Expand Down Expand Up @@ -478,6 +482,7 @@ def render(self, db: Session, *, view: models.View) -> ViewRenderContext:
),
)


def _geo_meta(
self, db: Session, view: models.View
) -> tuple[dict[str, int], dict[int, models.ObjectMeta]]:
Expand Down
18 changes: 12 additions & 6 deletions gerrydb_meta/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from uuid import UUID, uuid4

from geoalchemy2 import Geography as SqlGeography
from sqlalchemy import JSON, BigInteger, Boolean, CheckConstraint, DateTime
from sqlalchemy import JSON, BigInteger, Boolean, CheckConstraint, DateTime, text, event
from sqlalchemy import Enum as SqlEnum
from sqlalchemy import (
ForeignKey,
Expand All @@ -17,7 +17,7 @@
UniqueConstraint,
)
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, Session
from sqlalchemy.sql import func

from gerrydb_meta.enums import (
Expand All @@ -27,8 +27,10 @@
ScopeType,
ViewRenderStatus,
)
from gerrydb_meta.utils import create_column_value_partition_text

metadata_obj = MetaData(schema="gerrydb")
SCHEMA = "gerrydb"
metadata_obj = MetaData(schema=SCHEMA)


class Base(DeclarativeBase):
Expand Down Expand Up @@ -547,33 +549,37 @@ class ColumnSetMember(Base):

class ColumnValue(Base):
__tablename__ = "column_value"
__table_args__ = (UniqueConstraint("col_id", "geo_id", "valid_from"),)
__table_args__ = (
UniqueConstraint("col_id", "geo_id", "valid_from"),
{"postgresql_partition_by": "LIST (col_id)"},
)

val_id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
col_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("column.col_id"),
nullable=False,
primary_key=True,
)
geo_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("geography.geo_id"),
nullable=False,
primary_key=True,
)
meta_id: Mapped[int] = mapped_column(
Integer, ForeignKey("meta.meta_id"), nullable=False
)
valid_from: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
nullable=False,
primary_key=True,
)
valid_to: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=True)

val_float: Mapped[float] = mapped_column(postgresql.DOUBLE_PRECISION, nullable=True)
val_int: Mapped[int] = mapped_column(BigInteger, nullable=True)
val_str: Mapped[str] = mapped_column(Text, nullable=True)
val_bool: Mapped[bool] = mapped_column(Boolean, nullable=True)
val_json: Mapped[Any] = mapped_column(postgresql.JSONB, nullable=True)

meta: Mapped[ObjectMeta] = relationship("ObjectMeta")

Expand Down
8 changes: 8 additions & 0 deletions gerrydb_meta/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from sqlalchemy import text
from gerrydb_meta import models


def create_column_value_partition_text(column_id: int):
table_name = models.ColumnValue.__table__.name
sql = f"CREATE TABLE IF NOT EXISTS {models.SCHEMA}.{table_name}_{column_id} PARTITION OF {models.SCHEMA}.{table_name} FOR VALUES IN ({column_id})"
return text(sql)
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ networkx = "^3.3.0"

[tool.poetry.dev-dependencies]
pytest = "^7.2.1"
black = "^22.6.0"
black = "^25.1.0"
isort = "^5.10.1"
alembic = "^1.9.2"
networkx = "^3.3.0"
Expand Down
3 changes: 1 addition & 2 deletions tests/api/test_column_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
(ColumnType.FLOAT, 1.0, float("inf")),
(ColumnType.BOOL, True, False),
(ColumnType.STR, "", "abc"),
(ColumnType.JSON, {"key": "value"}, [1, 2, "3"]),
],
ids=("int", "float", "bool", "str", "json"),
ids=("int", "float", "bool", "str"),
)
def test_api_column_value_set__two_geos(ctx_public_namespace_read_write, typed_vals):
col_type, val1, val2 = typed_vals
Expand Down
3 changes: 0 additions & 3 deletions tests/crud/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,6 @@ def test_crud_column_set_values(db_with_meta):
assert cols_list[0].val_bool is None
assert cols_list[1].val_bool is None

assert cols_list[0].val_json is None
assert cols_list[1].val_json is None


def test_crud_column_patch(db_with_meta):
db, meta = db_with_meta
Expand Down
12 changes: 12 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from gerrydb_meta.utils import create_column_value_partition_text
from sqlalchemy import text


def test_create_column_value_partition_text():
column_id = 42
got = create_column_value_partition_text(column_id=column_id)
wanted = text(
"CREATE TABLE IF NOT EXISTS gerrydb.column_value_42 PARTITION OF gerrydb.column_value FOR VALUES IN (42)"
)
# different object instances, so compare string form
assert str(got) == str(wanted)
Loading