Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

👌 Improve performance of needs builders #1054

Merged
merged 1 commit into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 133 additions & 89 deletions sphinx_needs/builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Iterable, List, Optional, Set
from typing import Iterable, List, Optional, Sequence, Set

from docutils import nodes
from sphinx import version_info
Expand All @@ -8,28 +8,58 @@

from sphinx_needs.config import NeedsSphinxConfig
from sphinx_needs.data import NeedsInfoType, SphinxNeedsData
from sphinx_needs.directives.need import post_process_needs_data
from sphinx_needs.logging import get_logger
from sphinx_needs.needsfile import NeedsList

log = get_logger(__name__)
LOGGER = get_logger(__name__)


class NeedsBuilder(Builder):
"""Output the needs data as a JSON file,
filtering by the ``needs_builder_filter`` config option if set,
and writing to ``needs.json`` (or the ``needs_file`` config option if set)
in the output folder.

Note this builder normally completely skips the write phase,
where all documents are post-transformed, to improve performance.
It is assumed all need data is already read in the read phase,
and the post-processing of the data is done in the finish phase.

However, if the ``export_id`` option is set for any directives,
the write phase is still run, since this filter data is currently only added there.
A warning is issued in this case.
"""

name = "needs"
format = "needs"
file_suffix = ".txt"
links_suffix = None

def write_doc(self, docname: str, doctree: nodes.document) -> None:
pass
def get_outdated_docs(self) -> Iterable[str]:
return []

def write(
self,
build_docnames: Iterable[str],
updated_docnames: Sequence[str],
method: str = "update",
) -> None:
if not SphinxNeedsData(self.env).has_export_filters:
return
LOGGER.warning(
"At least one use of `export_id` directive option, requires a slower build", type="needs", subtype="build"
)
return super().write(build_docnames, updated_docnames, method)

def finish(self) -> None:
env = self.env
data = SphinxNeedsData(env)
post_process_needs_data(self.app)

data = SphinxNeedsData(self.env)
needs_config = NeedsSphinxConfig(self.env.config)
filters = data.get_or_create_filters()
version = getattr(env.config, "version", "unset")
needs_list = NeedsList(env.config, self.outdir, self.srcdir)
needs_config = NeedsSphinxConfig(env.config)
version = getattr(self.env.config, "version", "unset")
needs_list = NeedsList(self.env.config, self.outdir, self.srcdir)

if needs_config.file:
needs_file = needs_config.file
Expand All @@ -38,7 +68,7 @@ def finish(self) -> None:
# check if needs.json file exists in conf.py directory
needs_json = os.path.join(self.srcdir, "needs.json")
if os.path.exists(needs_json):
log.info("needs.json found, but will not be used because needs_file not configured.")
LOGGER.info("needs.json found, but will not be used because needs_file not configured.")

# Clean needs_list from already stored needs of the current version.
# This is needed as needs could have been removed from documentation and if this is the case,
Expand All @@ -62,25 +92,30 @@ def finish(self) -> None:
try:
needs_list.write_json()
except Exception as e:
log.error(f"Error during writing json file: {e}")
LOGGER.error(f"Error during writing json file: {e}")
else:
log.info("Needs successfully exported")
LOGGER.info("Needs successfully exported")

def get_outdated_docs(self) -> Iterable[str]:
return []
def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
# only needed if the write phase is run
return ""

def prepare_writing(self, _docnames: Set[str]) -> None:
# only needed if the write phase is run
pass

def write_doc(self, docname: str, doctree: nodes.document) -> None:
# only needed if the write phase is run
pass

def write_doc_serialized(self, _docname: str, _doctree: nodes.document) -> None:
# only needed if the write phase is run
pass

def cleanup(self) -> None:
# only needed if the write phase is run
pass

def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
return ""


def build_needs_json(app: Sphinx, _exception: Exception) -> None:
env = app.env
Expand All @@ -101,7 +136,87 @@ def build_needs_json(app: Sphinx, _exception: Exception) -> None:
needs_builder.finish()


class NeedsIdBuilder(Builder):
"""Output the needs data as multiple JSON files, one per need,
filtering by the ``needs_builder_filter`` config option if set,
and writing to the ``needs_id`` folder (or the ``build_json_per_id_path`` config option if set)
in the output folder.

Note this builder completely skips the write phase,
where all documents are post-transformed, to improve performance.
It is assumed all need data is already read in the read phase,
and the post-processing of the data is done in the finish phase.
"""

name = "needs_id"
format = "needs"
file_suffix = ".txt"
links_suffix = None

def get_outdated_docs(self) -> Iterable[str]:
return []

def write(
self,
build_docnames: Iterable[str],
updated_docnames: Sequence[str],
method: str = "update",
) -> None:
pass

def finish(self) -> None:
post_process_needs_data(self.app)

data = SphinxNeedsData(self.env)
needs = data.get_or_create_needs().values() # We need a list of needs for later filter checks
version = getattr(self.env.config, "version", "unset")
needs_config = NeedsSphinxConfig(self.env.config)
filter_string = needs_config.builder_filter
from sphinx_needs.filter_common import filter_needs

filtered_needs = filter_needs(needs, needs_config, filter_string)
needs_build_json_per_id_path = needs_config.build_json_per_id_path
needs_dir = os.path.join(self.outdir, needs_build_json_per_id_path)
if not os.path.exists(needs_dir):
os.makedirs(needs_dir, exist_ok=True)
for need in filtered_needs:
needs_list = NeedsList(self.env.config, self.outdir, self.srcdir)
needs_list.wipe_version(version)
needs_list.add_need(version, need)
id = need["id"]
try:
file_name = f"{id}.json"
needs_list.write_json(file_name, needs_dir)
except Exception as e:
LOGGER.error(f"Needs-ID Builder {id} error: {e}")
LOGGER.info("Needs_id successfully exported")


def build_needs_id_json(app: Sphinx, _exception: Exception) -> None:
env = app.env

if not NeedsSphinxConfig(env.config).build_json_per_id:
return

# Do not create an additional needs_json for every needs_id, if builder is already "needs_id".
if isinstance(app.builder, NeedsIdBuilder):
return
try:
needs_id_builder = NeedsIdBuilder(app, env)
except TypeError:
needs_id_builder = NeedsIdBuilder(app)
needs_id_builder.set_environment(env)

needs_id_builder.finish()


class NeedumlsBuilder(Builder):
"""Write generated PlantUML input files to the output dir,
that were generated by need directives,
if they have a ``save`` field set,
denoting the path relative to the output folder.
"""

name = "needumls"

def write_doc(self, docname: str, doctree: nodes.document) -> None:
Expand All @@ -120,7 +235,7 @@ def finish(self) -> None:
if not os.path.exists(save_dir):
os.makedirs(save_dir, exist_ok=True)

log.info(f"Storing needuml data to file {save_path}.")
LOGGER.info(f"Storing needuml data to file {save_path}.")
with open(save_path, "w") as f:
f.write(puml_content)

Expand Down Expand Up @@ -161,74 +276,3 @@ def build_needumls_pumls(app: Sphinx, _exception: Exception) -> None:
needs_builder.set_environment(env)

needs_builder.finish()


class NeedsIdBuilder(Builder):
"""Json builder for needs, which creates separate json-files per need"""

name = "needs_id"
format = "needs"
file_suffix = ".txt"
links_suffix = None

def write_doc(self, docname: str, doctree: nodes.document) -> None:
pass

def finish(self) -> None:
env = self.env
data = SphinxNeedsData(env)
needs = data.get_or_create_needs().values() # We need a list of needs for later filter checks
version = getattr(env.config, "version", "unset")
needs_config = NeedsSphinxConfig(env.config)
filter_string = needs_config.builder_filter
from sphinx_needs.filter_common import filter_needs

filtered_needs = filter_needs(needs, needs_config, filter_string)
needs_build_json_per_id_path = needs_config.build_json_per_id_path
needs_dir = os.path.join(self.outdir, needs_build_json_per_id_path)
if not os.path.exists(needs_dir):
os.makedirs(needs_dir, exist_ok=True)
for need in filtered_needs:
needs_list = NeedsList(env.config, self.outdir, self.srcdir)
needs_list.wipe_version(version)
needs_list.add_need(version, need)
id = need["id"]
try:
file_name = f"{id}.json"
needs_list.write_json(file_name, needs_dir)
except Exception as e:
log.error(f"Needs-ID Builder {id} error: {e}")
log.info("Needs_id successfully exported")

def get_outdated_docs(self) -> Iterable[str]:
return []

def prepare_writing(self, _docnames: Set[str]) -> None:
pass

def write_doc_serialized(self, _docname: str, _doctree: nodes.document) -> None:
pass

def cleanup(self) -> None:
pass

def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
return ""


def build_needs_id_json(app: Sphinx, _exception: Exception) -> None:
env = app.env

if not NeedsSphinxConfig(env.config).build_json_per_id:
return

# Do not create an additional needs_json for every needs_id, if builder is already "needs_id".
if isinstance(app.builder, NeedsIdBuilder):
return
try:
needs_id_builder = NeedsIdBuilder(app, env)
except TypeError:
needs_id_builder = NeedsIdBuilder(app)
needs_id_builder.set_environment(env)

needs_id_builder.finish()
18 changes: 17 additions & 1 deletion sphinx_needs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ class NeedsFilterType(TypedDict):
status: list[str]
tags: list[str]
types: list[str]
export_id: str
result: list[str]
amount: int
export_id: str
"""If set, the filter is exported with this ID in the needs.json file."""


class NeedsBaseDataType(TypedDict):
Expand Down Expand Up @@ -271,6 +272,7 @@ class NeedsFilteredBaseType(NeedsBaseDataType):
filter_code: list[str]
filter_func: None | str
export_id: str
"""If set, the filter is exported with this ID in the needs.json file."""


class NeedsFilteredDiagramBaseType(NeedsFilteredBaseType):
Expand Down Expand Up @@ -409,6 +411,18 @@ def get_or_create_needs(self) -> dict[str, NeedsInfoType]:
self.env.needs_all_needs = {}
return self.env.needs_all_needs

@property
def has_export_filters(self) -> bool:
"""Whether any filters have export IDs."""
try:
return self.env.needs_filters_export_id
except AttributeError:
return False

@has_export_filters.setter
def has_export_filters(self, value: bool) -> None:
self.env.needs_filters_export_id = value

def get_or_create_filters(self) -> dict[str, NeedsFilterType]:
"""Get all filters, mapped by ID.

Expand Down Expand Up @@ -593,6 +607,8 @@ def merge_data(_app: Sphinx, env: BuildEnvironment, _docnames: list[str], other:
needs = SphinxNeedsData(env).get_or_create_needs()
other_needs = SphinxNeedsData(other).get_or_create_needs()
needs.update(other_needs)
if SphinxNeedsData(other).has_export_filters:
SphinxNeedsData(env).has_export_filters = True

def _merge(name: str, is_complex_dict: bool = False) -> None:
# Update global needs dict
Expand Down
Loading