Skip to content

Commit

Permalink
Fix thrown exception on Utils.get_location() (apache#102)
Browse files Browse the repository at this point in the history
* Add check for file path when extracting task location

Signed-off-by: wslulciuc <[email protected]>

* continued: Add check for file path when extracting task location

Signed-off-by: wslulciuc <[email protected]>

* Update message for task location error

Signed-off-by: wslulciuc <[email protected]>

* continued: Update message for task location error

Signed-off-by: wslulciuc <[email protected]>

* Add check for url building git url

Signed-off-by: wslulciuc <[email protected]>
  • Loading branch information
wslulciuc authored Nov 30, 2020
1 parent 5077995 commit 5570016
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
5 changes: 3 additions & 2 deletions marquez_airflow/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,9 @@ def _get_location(task):
return get_location(task.file_path)
else:
return get_location(task.dag.fileloc)
except Exception as e:
log.warning(f'Unable to fetch the location. {e}', exc_info=True)
except Exception:
log.warning(f"Failed to get location for task '{task.task_id}'.",
exc_info=True)
return None

@staticmethod
Expand Down
17 changes: 14 additions & 3 deletions marquez_airflow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ def make_key(job_name, run_id):
return "marquez_id_mapping-{}-{}".format(job_name, run_id)


def url_to_https(url):
def url_to_https(url) -> str:
# Ensure URL exists
if not url:
return None

base_url = None
if url.startswith('git@'):
part = url.split('git@')[1:2]
Expand All @@ -75,14 +79,18 @@ def url_to_https(url):
base_url = url

if not base_url:
raise ValueError(f'Unable to extract location from: {url}')
raise ValueError(f"Unable to extract location from: {url}")

if base_url.endswith('.git'):
base_url = base_url[:-4]
return base_url


def get_location(file_path):
def get_location(file_path) -> str:
# Ensure file path exists
if not file_path:
return None

# move to the file directory
abs_path = os.path.abspath(file_path)
file_name = os.path.basename(file_path)
Expand All @@ -99,6 +107,9 @@ def get_location(file_path):

# build the URL
base_url = url_to_https(repo_url)
if not base_url:
return None

return f'{base_url}/blob/{commit_id}/{repo_relative_path}{file_name}'


Expand Down
12 changes: 12 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from marquez_airflow.extractors import StepMetadata
from marquez_airflow.version import VERSION as MARQUEZ_AIRFLOW_VERSION
from marquez_airflow.utils import (
url_to_https,
get_location,
get_connection_uri,
add_airflow_info_to
)
Expand Down Expand Up @@ -56,3 +58,13 @@ def test_add_airflow_info_to():
assert step_metadata.context['airflow.task_info'] is not None
assert step_metadata.context['marquez_airflow.version'] == \
MARQUEZ_AIRFLOW_VERSION


def test_get_location_no_file_path():
assert get_location(None) is None
assert get_location("") is None


def test_url_to_https_no_url():
assert url_to_https(None) is None
assert url_to_https("") is None

0 comments on commit 5570016

Please sign in to comment.