Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix modified date handling; add env.var support for version. #148

Merged
merged 2 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Change log

## Release 0.6.x (2023-08-dd)

New features:

- Support for two environment variables was added. Both changes enable achieving consistent version/tag info in the gh-based vocabulary management (related [voc4cat-template #11](https://github.com/nfdi4cat/voc4cat-template/issues/11)). #148
- `VOC4CAT_MODIFIED` - If set to a truthy value, the current date is used as modified date; the date in the source is ignored.
- `VOC4CAT_VERSION` - version string; if present this version info has highest precedence. A leading "v" will be removed.

Bug fixes:

- Modified date of concept scheme was not transferred from xlsx to rdf. #147, #148

## Release 0.6.2 (2023-08-10)

New features:
Expand Down
8 changes: 8 additions & 0 deletions src/voc4cat/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ def run_pylode(turtle_file: Path, output_path: Path) -> None:
'<section id="overview">',
'<section id="overview" style="display: none;">',
)
content = content.replace(
"<dt>Ontology RDF</dt>",
"<dt>Vocabulary RDF</dt>",
)
content = content.replace(
f'<dd><a href="{filename.stem}.ttl">RDF (turtle)</a></dd>',
f'<dd><a href="../{filename.stem}.ttl">RDF (turtle)</a></dd>',
)
with open(outfile, "w") as html_file:
html_file.write(content)

Expand Down
40 changes: 22 additions & 18 deletions src/voc4cat/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import logging
import os
from itertools import chain
from typing import List, Union

Expand Down Expand Up @@ -169,13 +170,31 @@ def creator_must_be_from_list(cls, v):
raise ValueError(msg)
return v

@validator("modified")
def set_modified_date_if_missing(cls, v):
if os.getenv("CI") is not None: # Don't track modified date in GitHub.
v = None
return v

@validator("publisher")
def publisher_must_be_from_list(cls, v):
if v not in ORGANISATIONS:
msg = f"Organisations must be selected from the Organisations list: {', '.join(ORGANISATIONS)}"
raise ValueError(msg)
return v

@validator("version")
def version_from_env(cls, v):
if os.getenv("CI") is not None: # Don't track version in GitHub.
v = None
version_from_env = os.getenv("VOC4CAT_VERSION")
if version_from_env is not None:
if not version_from_env.startswith("v"):
msg = f'Invalid environment variable VOC4CAT_VERSION "{version_from_env}". Version must start with letter "v".'
raise ValueError(msg)
v = version_from_env
return v

def to_graph(self):
g = Graph()
v = URIRef(self.uri)
Expand All @@ -187,25 +206,10 @@ def to_graph(self):
g.add((v, SKOS.prefLabel, Literal(self.title, lang="en")))
g.add((v, SKOS.definition, Literal(self.description, lang="en")))
g.add((v, DCTERMS.created, Literal(self.created, datatype=XSD.date)))
if self.modified is not None:
g.add((v, DCTERMS.modified, Literal(self.created, datatype=XSD.date)))
else:
g.add(
(
v,
DCTERMS.modified,
Literal(
datetime.datetime.now(datetime.timezone.utc).strftime(
"%Y-%m-%d"
),
datatype=XSD.date,
),
)
)
g.add((v, DCTERMS.modified, Literal(self.modified, datatype=XSD.date))),
g.add((v, DCTERMS.creator, ORGANISATIONS[self.creator]))
g.add((v, DCTERMS.publisher, ORGANISATIONS[self.publisher]))
if self.version is not None:
g.add((v, OWL.versionInfo, Literal(self.version)))
g.add((v, OWL.versionInfo, Literal(self.version)))
g.add((v, DCTERMS.provenance, Literal(self.provenance, lang="en")))
if self.custodian is not None:
g.add((v, DCAT.contactPoint, Literal(self.custodian)))
Expand All @@ -224,7 +228,7 @@ def to_excel(self, wb: Workbook):
ws["B3"] = self.title
ws["B4"] = self.description
ws["B5"] = self.created.isoformat()
ws["B6"] = self.modified.isoformat()
ws["B6"] = None if self.modified is None else self.modified.isoformat()
ws["B7"] = self.creator
ws["B8"] = self.publisher
ws["B9"] = self.version
Expand Down
33 changes: 30 additions & 3 deletions src/voc4cat/transform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
import shutil
from collections import defaultdict
from itertools import count, zip_longest
Expand All @@ -7,8 +8,8 @@

import openpyxl
from openpyxl.styles import Alignment
from rdflib import Graph
from rdflib.namespace import SKOS
from rdflib import Graph, Literal
from rdflib.namespace import DCTERMS, OWL, RDF, SKOS, XSD

from voc4cat import config
from voc4cat.checks import Voc4catError
Expand All @@ -27,7 +28,7 @@
def extract_numeric_id_from_iri(iri):
iri_path = urlsplit(iri).path
reverse_id = []
for char in reversed(iri_path): # pragma: no cover
for char in reversed(iri_path):
if char.isdigit():
reverse_id.append(char)
elif char == "/":
Expand Down Expand Up @@ -63,6 +64,27 @@ def write_split_turtle(vocab_graph: Graph, outdir: Path) -> None:
logger.debug("-> wrote %i %ss-file(s).", len(qresults), skos_class)


def autoversion_cs(graph: Graph) -> Graph:
"""Set modified date and version if "requested" via environment variables."""
if any(graph.triples((None, RDF.type, SKOS.ConceptScheme))):
cs, _, _ = next(graph.triples((None, RDF.type, SKOS.ConceptScheme)))
if os.getenv("VOC4CAT_MODIFIED") is not None:
graph.remove((None, DCTERMS.modified, None))
date_modified = os.getenv("VOC4CAT_MODIFIED")
graph.add((cs, DCTERMS.modified, Literal(date_modified, datatype=XSD.date)))
if os.getenv("VOC4CAT_VERSION") is not None:
graph.remove((None, OWL.versionInfo, None))
version = os.getenv("VOC4CAT_VERSION")
if version is not None and not version.startswith("v"):
msg = 'Invalid environment variable VOC4CAT_VERSION "%s". Version must start with letter "v".'
logger.error(msg, version)
raise Voc4catError(msg % version)
graph.add(
(cs, OWL.versionInfo, Literal(version)),
)
return graph


def join_split_turtle(vocab_dir: Path) -> Graph:
# Search recursively all turtle files belonging to the concept scheme
turtle_files = vocab_dir.rglob("*.ttl")
Expand All @@ -71,6 +93,11 @@ def join_split_turtle(vocab_dir: Path) -> Graph:
# Load each turtle file into a separate graph and merge it into the concept scheme graph
for file in turtle_files:
graph = Graph().parse(file, format="turtle")
# Set modified date if "requested" via environment variable.
if file.name == "concept_scheme.ttl" or any(
graph.triples((None, RDF.type, SKOS.ConceptScheme))
):
graph = autoversion_cs(graph)
cs_graph += graph
cs_graph.serialize(destination=vocab_dir.with_suffix(".ttl"), format="turtle")
return cs_graph
Expand Down
Binary file modified tests/templ_versions/043_exhaustive_example.xlsx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cs: a skos:ConceptScheme ;
dcterms:created "2022-03-07"^^xsd:date ;
dcterms:creator <https://linked.data.gov.au/org/cgi> ;
dcterms:hasPart <http://example.org/example_collection_uri> ;
dcterms:modified "2022-03-07"^^xsd:date ;
dcterms:modified "2022-03-10"^^xsd:date ;
dcterms:provenance "Example Provenance"@en ;
dcterms:publisher <https://linked.data.gov.au/org/cgi> ;
rdfs:seeAlso "1.2.3.4" ;
Expand Down
77 changes: 71 additions & 6 deletions tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import datetime
import os
from unittest import mock

import pytest
from pydantic.error_wrappers import ValidationError
from rdflib import Graph
Expand Down Expand Up @@ -81,20 +85,81 @@ def test_check_uri_vs_config(datadir, temp_config):
# === From here on: "old" tests from rdflib.vocexcel ===


@mock.patch.dict(os.environ, clear=True) # required to hide gh-action environment vars
def test_vocabulary_valid():
ConceptScheme(
cs = ConceptScheme(
uri="https://linked.data.gov.au/def/borehole-start-point",
title="Borehole Start Point",
description="Indicates the nature of the borehole start point location",
created="2020-04-02",
modified="2020-04-04",
creator="GSQ",
publisher="GSQ",
version="1.0",
provenance="Derived from the 2011-09 version of CGI Borehole start point list",
custodian="Vance Kelly",
pid="http://pid.geoscience.gov.au/dataset/ga/114541",
)
assert cs.modified == datetime.date(2020, 4, 4)
assert cs.version == "1.0"


@mock.patch.dict(os.environ, {"CI": ""})
def test_vocabulary_valid_in_ci():
cs = ConceptScheme(
uri="https://linked.data.gov.au/def/borehole-start-point",
title="Borehole Start Point",
description="Indicates the nature of the borehole start point location",
created="2020-04-02",
modified="2020-04-04",
creator="GSQ",
publisher="GSQ",
version="1.0",
provenance="Derived from the 2011-09 version of CGI Borehole start point list",
custodian="Vance Kelly",
pid="http://pid.geoscience.gov.au/dataset/ga/114541",
)
assert cs.modified is None
assert cs.version is None


@mock.patch.dict(os.environ, {"CI": "", "VOC4CAT_VERSION": "v2023-08-15"})
def test_vocabulary_valid_version_via_envvar():
cs = ConceptScheme(
uri="https://linked.data.gov.au/def/borehole-start-point",
title="Borehole Start Point",
description="Indicates the nature of the borehole start point location",
created="2020-04-02",
modified="2020-04-02",
modified="2020-04-04",
creator="GSQ",
publisher="GSQ",
version="",
version="1.0",
provenance="Derived from the 2011-09 version of CGI Borehole start point list",
custodian="Vance Kelly",
pid="http://pid.geoscience.gov.au/dataset/ga/114541",
)
assert cs.modified is None
assert cs.version == "v2023-08-15"


@mock.patch.dict(os.environ, {"CI": "", "VOC4CAT_VERSION": "2023-08-15"})
def test_vocabulary_invalid_version_via_envvar():
with pytest.raises(
ValidationError, match="Invalid environment variable VOC4CAT_VERSION"
):
ConceptScheme(
uri="https://linked.data.gov.au/def/borehole-start-point",
title="Borehole Start Point",
description="Indicates the nature of the borehole start point location",
created="2020-04-02",
modified="2020-04-04",
creator="GSQ",
publisher="GSQ",
version="1.0",
provenance="Derived from the 2011-09 version of CGI Borehole start point list",
custodian="Vance Kelly",
pid="http://pid.geoscience.gov.au/dataset/ga/114541",
)


def test_vocabulary_invalid_uri():
Expand All @@ -104,7 +169,7 @@ def test_vocabulary_invalid_uri():
title="Borehole Start Point",
description="Indicates the nature of the borehole start point location",
created="2020-04-02",
modified="02/042020",
modified=None,
creator="GSQ",
publisher="GSQ",
version="",
Expand All @@ -121,7 +186,7 @@ def test_vocabulary_invalid_created_date():
title="Borehole Start Point",
description="Indicates the nature of the borehole start point location",
created="2020-04",
modified="2020-04-02",
modified="2020-04-04",
creator="GSQ",
publisher="GSQ",
version="",
Expand All @@ -138,7 +203,7 @@ def test_vocabulary_invalid_publisher():
title="Borehole Start Point",
description="Indicates the nature of the borehole start point location",
created="2020-04-02",
modified="2020-04-02",
modified="2020-04-04",
creator="GSQ",
publisher="WHO",
version="",
Expand Down
4 changes: 4 additions & 0 deletions tests/test_template043.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
from pathlib import Path
from unittest import mock

import pytest
import voc4cat
Expand Down Expand Up @@ -39,6 +41,7 @@ def test_simple():
) in g, "Provenance for vocab is not correct"


@mock.patch.dict(os.environ, clear=True) # required to hide gh-action environment vars
def test_exhaustive_template_is_isomorphic():
g1 = Graph().parse(
Path(__file__).parent
Expand All @@ -52,6 +55,7 @@ def test_exhaustive_template_is_isomorphic():
assert compare.isomorphic(g1, g2), "Graphs are not Isomorphic"


@mock.patch.dict(os.environ, clear=True) # required to hide gh-action environment vars
def test_rdf_to_excel():
g1 = Graph().parse(
Path(__file__).parent
Expand Down
Loading