Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add check_columns_are_all_in_sync #338

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dbt-bouncer-example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ catalog_checks:
test_name: not_null
- name: check_columns_are_all_documented
include: ^models/marts
- name: check_columns_are_all_in_sync
include: ^models/marts
- name: check_columns_are_documented_in_public_models
- name: check_source_columns_are_all_documented
exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only
Expand Down
39 changes: 39 additions & 0 deletions src/dbt_bouncer/checks/catalog/check_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,45 @@ def execute(self) -> None:
)


class CheckColumnsAreAllInSync(BaseCheck):
"""All columns in a model should be included in the model's properties file, i.e. `.yml` file, and vice versa.

Receives:
catalog_node (CatalogNodes): The CatalogNodes object to check.
models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`.

Other Parameters:
exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked.
include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked.
severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`.

Example(s):
```yaml
catalog_checks:
- name: check_columns_are_all_documented
```

"""

catalog_node: "CatalogNodes" = Field(default=None)
models: List["DbtBouncerModelBase"] = Field(default=[])
name: Literal["check_columns_are_all_in_sync"]

def execute(self) -> None:
"""Execute the check."""
if self.catalog_node.unique_id.split(".")[0] == "model":
model = next(
m for m in self.models if m.unique_id == self.catalog_node.unique_id
)
properties_column_names = {column.name for column in model.columns.values()}
model_column_names = {
column.name for column in self.catalog_node.columns.values()
}
assert properties_column_names == model_column_names, (
f"`{self.catalog_node.unique_id.split('.')[-1]}` has columns that are out of sync between the SQL model and the `.yaml` properties file: {properties_column_names ^ model_column_names}"
)


class CheckColumnsAreDocumentedInPublicModels(BaseCheck):
"""Columns should have a populated description in public models.

Expand Down
171 changes: 169 additions & 2 deletions tests/unit/checks/catalog/test_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@
CheckColumnHasSpecifiedTest,
CheckColumnNameCompliesToColumnType,
CheckColumnsAreAllDocumented,
CheckColumnsAreAllInSync,
CheckColumnsAreDocumentedInPublicModels,
)

CheckColumnDescriptionPopulated.model_rebuild()
CheckColumnNameCompliesToColumnType.model_rebuild()
CheckColumnHasSpecifiedTest.model_rebuild()
CheckColumnNameCompliesToColumnType.model_rebuild()
CheckColumnsAreAllDocumented.model_rebuild()
CheckColumnsAreDocumentedInPublicModels.model_rebuild()
CheckColumnHasSpecifiedTest.model_rebuild()
CheckColumnsAreAllInSync.model_rebuild()


@pytest.mark.parametrize(
Expand Down Expand Up @@ -610,3 +611,169 @@ def test_check_column_name_complies_to_column_type(
name="check_column_name_complies_to_column_type",
types=types,
).execute()


@pytest.mark.parametrize(
("catalog_node", "models", "expectation"),
[
(
CatalogNodes(
**{
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"index": 2,
"name": "col_2",
"type": "INTEGER",
},
},
"metadata": {
"name": "table_1",
"schema": "main",
"type": "VIEW",
},
"stats": {},
"unique_id": "model.package_name.model_1",
},
),
[
Nodes4(
**{
"alias": "model_1",
"checksum": {"name": "sha256", "checksum": ""},
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"index": 2,
"name": "col_2",
"type": "INTEGER",
},
},
"fqn": ["package_name", "model_1"],
"name": "model_1",
"original_file_path": "model_1.sql",
"package_name": "package_name",
"path": "model_1.sql",
"resource_type": "model",
"schema": "main",
"unique_id": "model.package_name.model_1",
},
),
],
does_not_raise(),
),
(
CatalogNodes(
**{
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"index": 2,
"name": "col_2",
"type": "INTEGER",
},
},
"metadata": {
"name": "table_1",
"schema": "main",
"type": "VIEW",
},
"stats": {},
"unique_id": "model.package_name.model_2",
},
),
[
Nodes4(
**{
"alias": "model_2",
"checksum": {"name": "sha256", "checksum": ""},
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
},
"fqn": ["package_name", "model_2"],
"name": "model_2",
"original_file_path": "model_2.sql",
"package_name": "package_name",
"path": "model_2.sql",
"resource_type": "model",
"schema": "main",
"unique_id": "model.package_name.model_2",
},
),
],
pytest.raises(AssertionError),
),
(
CatalogNodes(
**{
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
},
"metadata": {
"name": "table_1",
"schema": "main",
"type": "VIEW",
},
"stats": {},
"unique_id": "model.package_name.model_2",
},
),
[
Nodes4(
**{
"alias": "model_2",
"checksum": {"name": "sha256", "checksum": ""},
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"index": 1,
"name": "col_2",
"type": "INTEGER",
},
},
"fqn": ["package_name", "model_2"],
"name": "model_2",
"original_file_path": "model_2.sql",
"package_name": "package_name",
"path": "model_2.sql",
"resource_type": "model",
"schema": "main",
"unique_id": "model.package_name.model_2",
},
),
],
pytest.raises(AssertionError),
),
],
)
def test_check_columns_are_in_sync(catalog_node, models, expectation):
with expectation:
CheckColumnsAreAllInSync(
catalog_node=catalog_node,
models=models,
name="check_columns_are_all_in_sync",
).execute()