Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest) tableau use luid filter #12799

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 11 additions & 16 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,7 @@
query_filter: dict = {},
) -> Iterable[dict]:
query_filter = optimize_query_filter(query_filter)
logger.debug(f"Query filter {query_filter}")

Check warning on line 1568 in metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py#L1568

Added line #L1568 was not covered by tests

# Calls the get_connection_object_page function to get the objects,
# and automatically handles pagination.
Expand All @@ -1574,6 +1575,7 @@
self.report.num_filter_queries_by_connection_type[connection_type] += len(
filter_pages
)
logger.debug(f"Filter pages {filter_pages}")

Check warning on line 1578 in metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py#L1578

Added line #L1578 was not covered by tests

for filter_page in filter_pages:
has_next_page = 1
Expand Down Expand Up @@ -1609,31 +1611,24 @@
project_names: List[str] = [
project.name for project in self.tableau_project_registry.values()
]
projects = {c.PROJECT_NAME_WITH_IN: project_names}
logger.debug(f"Workbook emit: Project names: {project_names}")

Check warning on line 1614 in metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py#L1614

Added line #L1614 was not covered by tests

for workbook in self.get_connection_objects(
query=workbook_graphql_query,
connection_type=c.WORKBOOKS_CONNECTION,
query_filter=projects,
page_size=self.config.effective_workbook_page_size,
):
# This check is needed as we are using projectNameWithin which return project as per project name so if
# user want to ingest only nested project C from A->B->C then tableau might return more than one Project
# if multiple project has name C. Ideal solution is to use projectLuidWithin to avoid duplicate project,
# however Tableau supports projectLuidWithin in Tableau Cloud June 2022 / Server 2022.3 and later.
project_luid: Optional[str] = self._get_workbook_project_luid(workbook)
if project_luid not in self.tableau_project_registry.keys():
wrk_name: Optional[str] = workbook.get(c.NAME)
wrk_id: Optional[str] = workbook.get(c.ID)
prj_name: Optional[str] = workbook.get(c.PROJECT_NAME)

self.report.warning(
title="Skipping Missing Workbook",
message="Skipping workbook as its project is not present in project registry",
context=f"workbook={wrk_name}({wrk_id}), project={prj_name}({project_luid})",
logger.debug(

Check warning on line 1621 in metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py#L1621

Added line #L1621 was not covered by tests
f"Evaluating project: {workbook.get(c.PROJECT_NAME)} workbook {workbook.get(c.NAME)}"
)
if workbook.get(c.PROJECT_NAME) not in project_names:
logger.debug(

Check warning on line 1625 in metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py#L1624-L1625

Added lines #L1624 - L1625 were not covered by tests
f"Skipping workbook {workbook.get(c.NAME)} as project {workbook.get(c.PROJECT_NAME)} "
f"is not in the project registry"
)
continue

logger.debug(f"Adding workbook {workbook.get(c.NAME)}")

Check warning on line 1631 in metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py#L1631

Added line #L1631 was not covered by tests
yield from self.emit_workbook_as_container(workbook)

for sheet in workbook.get(c.SHEETS, []):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -946,12 +946,15 @@
after: Optional[str],
qry_filter: str = "",
) -> dict:
if qry_filter:
qry_filter = f", filter: {{ {qry_filter} }}"
logger.debug(f"qry_filter: {qry_filter}")

Check warning on line 951 in metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py#L949-L951

Added lines #L949 - L951 were not covered by tests
query = f"""
query GetItems(
$first: Int,
$after: String
) {{
{connection_name} ( first: $first, after: $after, filter:{{ {qry_filter} }})
{connection_name} ( first: $first, after: $after{qry_filter})
{{
nodes {main_query}
pageInfo {{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
ERRORS = "errors"
NODES = "nodes"
PROJECT_NAME_WITH_IN = "projectNameWithin"
PROJECT_LUID_WITH_IN = "projectLuidWithin"
WORKBOOKS_CONNECTION = "workbooksConnection"
PROJECT_LUID = "projectLuid"
NAME = "name"
Expand Down
Loading