|
49 | 49 | DataprocWorkflowLink,
|
50 | 50 | DataprocWorkflowTemplateLink,
|
51 | 51 | )
|
52 |
| -from airflow.providers.google.cloud.openlineage.utils import ( |
53 |
| - inject_openlineage_properties_into_dataproc_batch, |
54 |
| - inject_openlineage_properties_into_dataproc_job, |
55 |
| - inject_openlineage_properties_into_dataproc_workflow_template, |
56 |
| -) |
57 | 52 | from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
58 | 53 | from airflow.providers.google.cloud.triggers.dataproc import (
|
59 | 54 | DataprocBatchTrigger,
|
@@ -1858,12 +1853,7 @@ def execute(self, context: Context):
|
1858 | 1853 | project_id = self.project_id or hook.project_id
|
1859 | 1854 | if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
|
1860 | 1855 | self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
1861 |
| - self.template = inject_openlineage_properties_into_dataproc_workflow_template( |
1862 |
| - template=self.template, |
1863 |
| - context=context, |
1864 |
| - inject_parent_job_info=self.openlineage_inject_parent_job_info, |
1865 |
| - inject_transport_info=self.openlineage_inject_transport_info, |
1866 |
| - ) |
| 1856 | + self._inject_openlineage_properties_into_dataproc_workflow_template(context) |
1867 | 1857 |
|
1868 | 1858 | operation = hook.instantiate_inline_workflow_template(
|
1869 | 1859 | template=self.template,
|
@@ -1920,6 +1910,25 @@ def on_kill(self) -> None:
|
1920 | 1910 | hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
1921 | 1911 | hook.get_operations_client(region=self.region).cancel_operation(name=self.operation_name)
|
1922 | 1912 |
|
| 1913 | + def _inject_openlineage_properties_into_dataproc_workflow_template(self, context: Context) -> None: |
| 1914 | + try: |
| 1915 | + from airflow.providers.google.cloud.openlineage.utils import ( |
| 1916 | + inject_openlineage_properties_into_dataproc_workflow_template, |
| 1917 | + ) |
| 1918 | + |
| 1919 | + self.template = inject_openlineage_properties_into_dataproc_workflow_template( |
| 1920 | + template=self.template, |
| 1921 | + context=context, |
| 1922 | + inject_parent_job_info=self.openlineage_inject_parent_job_info, |
| 1923 | + inject_transport_info=self.openlineage_inject_transport_info, |
| 1924 | + ) |
| 1925 | + except Exception as e: |
| 1926 | + self.log.warning( |
| 1927 | + "An error occurred while trying to inject OpenLineage information. " |
| 1928 | + "Dataproc template has not been modified by OpenLineage.", |
| 1929 | + exc_info=e, |
| 1930 | + ) |
| 1931 | + |
1923 | 1932 |
|
1924 | 1933 | class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
1925 | 1934 | """
|
@@ -2017,12 +2026,8 @@ def execute(self, context: Context):
|
2017 | 2026 | self.hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
2018 | 2027 | if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
|
2019 | 2028 | self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
2020 |
| - self.job = inject_openlineage_properties_into_dataproc_job( |
2021 |
| - job=self.job, |
2022 |
| - context=context, |
2023 |
| - inject_parent_job_info=self.openlineage_inject_parent_job_info, |
2024 |
| - inject_transport_info=self.openlineage_inject_transport_info, |
2025 |
| - ) |
| 2029 | + self._inject_openlineage_properties_into_dataproc_job(context) |
| 2030 | + |
2026 | 2031 | job_object = self.hook.submit_job(
|
2027 | 2032 | project_id=self.project_id,
|
2028 | 2033 | region=self.region,
|
@@ -2096,6 +2101,25 @@ def on_kill(self):
|
2096 | 2101 | if self.job_id and self.cancel_on_kill:
|
2097 | 2102 | self.hook.cancel_job(job_id=self.job_id, project_id=self.project_id, region=self.region)
|
2098 | 2103 |
|
| 2104 | + def _inject_openlineage_properties_into_dataproc_job(self, context: Context) -> None: |
| 2105 | + try: |
| 2106 | + from airflow.providers.google.cloud.openlineage.utils import ( |
| 2107 | + inject_openlineage_properties_into_dataproc_job, |
| 2108 | + ) |
| 2109 | + |
| 2110 | + self.job = inject_openlineage_properties_into_dataproc_job( |
| 2111 | + job=self.job, |
| 2112 | + context=context, |
| 2113 | + inject_parent_job_info=self.openlineage_inject_parent_job_info, |
| 2114 | + inject_transport_info=self.openlineage_inject_transport_info, |
| 2115 | + ) |
| 2116 | + except Exception as e: |
| 2117 | + self.log.warning( |
| 2118 | + "An error occurred while trying to inject OpenLineage information. " |
| 2119 | + "Dataproc job has not been modified by OpenLineage.", |
| 2120 | + exc_info=e, |
| 2121 | + ) |
| 2122 | + |
2099 | 2123 |
|
2100 | 2124 | class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
2101 | 2125 | """
|
@@ -2502,12 +2526,7 @@ def execute(self, context: Context):
|
2502 | 2526 |
|
2503 | 2527 | if self.openlineage_inject_parent_job_info or self.openlineage_inject_transport_info:
|
2504 | 2528 | self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
2505 |
| - self.batch = inject_openlineage_properties_into_dataproc_batch( |
2506 |
| - batch=self.batch, |
2507 |
| - context=context, |
2508 |
| - inject_parent_job_info=self.openlineage_inject_parent_job_info, |
2509 |
| - inject_transport_info=self.openlineage_inject_transport_info, |
2510 |
| - ) |
| 2529 | + self._inject_openlineage_properties_into_dataproc_batch(context) |
2511 | 2530 |
|
2512 | 2531 | try:
|
2513 | 2532 | self.operation = self.hook.create_batch(
|
@@ -2670,6 +2689,25 @@ def retry_batch_creation(
|
2670 | 2689 | )
|
2671 | 2690 | return batch, batch_id
|
2672 | 2691 |
|
| 2692 | + def _inject_openlineage_properties_into_dataproc_batch(self, context: Context) -> None: |
| 2693 | + try: |
| 2694 | + from airflow.providers.google.cloud.openlineage.utils import ( |
| 2695 | + inject_openlineage_properties_into_dataproc_batch, |
| 2696 | + ) |
| 2697 | + |
| 2698 | + self.batch = inject_openlineage_properties_into_dataproc_batch( |
| 2699 | + batch=self.batch, |
| 2700 | + context=context, |
| 2701 | + inject_parent_job_info=self.openlineage_inject_parent_job_info, |
| 2702 | + inject_transport_info=self.openlineage_inject_transport_info, |
| 2703 | + ) |
| 2704 | + except Exception as e: |
| 2705 | + self.log.warning( |
| 2706 | + "An error occurred while trying to inject OpenLineage information. " |
| 2707 | + "Dataproc batch has not been modified by OpenLineage.", |
| 2708 | + exc_info=e, |
| 2709 | + ) |
| 2710 | + |
2673 | 2711 |
|
2674 | 2712 | class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
2675 | 2713 | """
|
|
0 commit comments