Source code for pytest_notebook.nb_regression

"""Jupyter Notebook Regression Test Class."""
import copy
import logging
import os
import sys
from typing import Any, List, TextIO, Tuple, Union

import attr
from attr.validators import instance_of
from nbdime.diff_format import DiffEntry
import nbformat
from nbformat import NotebookNode

try:
    # coverage is an optional dependency
    from coverage import Coverage as CoverageType
except ImportError:
    CoverageType = Any

from pytest_notebook.diffing import diff_notebooks, diff_to_string, filter_diff
from pytest_notebook.execution import (
    HELP_COVERAGE,
    HELP_COVERAGE_CONFIG,
    HELP_COVERAGE_SOURCE,
    execute_notebook,
)
from pytest_notebook.notebook import (
    load_notebook_with_config,
    regex_replace_nb,
    validate_regex_replace,
)
from pytest_notebook.post_processors import (
    ENTRY_POINT_NAME,
    list_processor_names,
    load_processor,
)
from pytest_notebook.utils import autodoc

logger = logging.getLogger(__name__)

HELP_EXEC_NOTEBOOK = (
    "Create a new notebook, by executing all cells in the original notebook"
)
HELP_EXEC_CWD = (
    "Path to the directory which the notebook will run in "
    "(defaults to directory of notebook)."
)
HELP_EXEC_TIMEOUT = "The maximum time to wait (in seconds) for execution of each cell."
HELP_EXEC_ALLOW_ERRORS = (
    "Do not stop execution after the first unexpected exception "
    "(where cell is not tagged ``raises-exception``)."
)
HELP_DIFF_REPLACE = (
    "A list of regex replacements to apply before diffing, "
    r"e.g. ``[('/cells/*/outputs', '\d{2,4}-\d{1,2}-\d{1,2}', 'DATE-STAMP')]``."
)
HELP_DIFF_IGNORE = (
    "List of diff paths to ignore, e.g. '/cells/1/outputs' or '/cells/\\*/metadata'."
)
HELP_DIFF_USE_COLOR = "Use ANSI color code escapes for text output."
HELP_DIFF_COLOR_WORDS = "Highlight changed words using only colors."
HELP_FORCE_REGEN = (
    "Re-generate notebook files, if no unexpected execution errors, "
    "and an output path has been supplied."
)
HELP_POST_PROCS = (
    "post-processors to apply to the new workbook, "
    f"relating to entry points in the '{ENTRY_POINT_NAME}' group"
)
HELP_COVERAGE_MERGE = "A coverage.Coverage instance, to merge coverage results with."


[docs]class NBRegressionError(Exception): """Exception to signal a regression test fail."""
[docs]@autodoc @attr.s(frozen=True, slots=True, repr=False) class NBRegressionResult: """A class to store the result of ``NBRegressionFixture.check``.""" nb_initial: NotebookNode = attr.ib( validator=instance_of(NotebookNode), metadata={"help": "Initial notebook."} ) nb_final: NotebookNode = attr.ib( validator=instance_of(NotebookNode), metadata={"help": "Notebook after execution and post-processing."}, ) diff_full: List[DiffEntry] = attr.ib( metadata={"help": "Full diff of initial/final notebooks."} ) diff_filtered: List[DiffEntry] = attr.ib( metadata={ "help": ( "Diff of initial/final notebooks, " "filtered according to the parsed configuration." ) } ) diff_string: str = attr.ib( validator=instance_of(str), metadata={"help": "The formatte string of diff_filtered."}, ) process_resources: dict = attr.ib( attr.Factory(dict), instance_of(dict), metadata={"help": "Resources returned from notebook processors."}, ) def __repr__(self): """Represent the class instance.""" return ( f"NBRegressionResult(diff_full_length={len(self.diff_full)}," f"diff_filtered_length={len(self.diff_filtered)})" )
[docs]@autodoc @attr.s class NBRegressionFixture: """Class to perform Jupyter Notebook Regression tests.""" exec_notebook: bool = attr.ib( True, instance_of(bool), metadata={"help": HELP_EXEC_NOTEBOOK} ) exec_cwd: Union[str, None] = attr.ib( None, instance_of((type(None), str)), metadata={"help": HELP_EXEC_CWD} )
[docs] @exec_cwd.validator def _validate_exec_cwd(self, attribute, value): if value is None: return if not isinstance(value, str): raise TypeError("exec_cwd must be None or a string") if not os.path.isdir(value): raise OSError(f"exec_cwd='{value}' is not an existing directory")
exec_allow_errors: bool = attr.ib( False, instance_of(bool), metadata={"help": HELP_EXEC_ALLOW_ERRORS} ) exec_timeout: int = attr.ib( 120, instance_of((int, float)), metadata={"help": HELP_EXEC_TIMEOUT} )
[docs] @exec_timeout.validator def _validate_exec_timeout(self, attribute, value): if not isinstance(value, int): raise TypeError("exec_timeout must be an integer") if value <= 0: raise ValueError("exec_timeout must be larger than 0")
coverage: bool = attr.ib(False, metadata={"help": HELP_COVERAGE})
[docs] @coverage.validator def _validate_coverage(self, attribute, value): if not isinstance(value, bool): raise TypeError("coverage must be an boolean") if value: try: import coverage # noqa: F401 except ImportError: raise ImportError("The 'coverage' package must be installed.")
cov_config: Union[str, None] = attr.ib( None, instance_of((type(None), str)), metadata={"help": HELP_COVERAGE_CONFIG} ) cov_source: Union[str, Tuple[str]] = attr.ib( None, instance_of((type(None), tuple)), metadata={"help": HELP_COVERAGE_SOURCE} ) cov_merge: Union[CoverageType, None] = attr.ib( None, metadata={"help": HELP_COVERAGE_MERGE}, hash=True )
[docs] @cov_merge.validator def _validate_cov_merge(self, attribute, value): if value is None: return try: from coverage import Coverage except ImportError: raise ImportError("The 'coverage' package must be installed") if not isinstance(value, Coverage): raise TypeError("cov_merge must be an instance of coverage.Coverage")
post_processors: tuple = attr.ib( ("coalesce_streams",), metadata={"help": HELP_POST_PROCS} )
[docs] @post_processors.validator def _validate_post_processors(self, attribute, values): if not isinstance(values, tuple): raise TypeError(f"post_processors must be a tuple: {values}") for name in values: if name not in list_processor_names(): raise TypeError( f"name '{name}' not found in entry points: {list_processor_names()}" )
process_resources: dict = attr.ib( attr.Factory(dict), instance_of(dict), metadata={"help": "Resources to parse to processor functions."}, ) diff_replace: tuple = attr.ib((), metadata={"help": HELP_DIFF_REPLACE})
[docs] @diff_replace.validator def _validate_diff_replace(self, attribute, values): if not isinstance(values, tuple): raise TypeError(f"diff_replace must be a tuple: {values}") for i, args in enumerate(values): validate_regex_replace(args, i)
diff_ignore: tuple = attr.ib( # TODO replace this default with a diff_replace one? ("/cells/*/outputs/*/traceback",), metadata={"help": HELP_DIFF_IGNORE}, )
[docs] @diff_ignore.validator def _validate_diff_ignore(self, attribute, values): if not isinstance(values, tuple): raise TypeError(f"diff_ignore must be a tuple: {values}") for item in values: if not isinstance(item, str): raise TypeError(f"diff_ignore item '{item}' must a strings") if not item.startswith("/"): raise ValueError(f"diff_ignore item '{item}' must start with '/'")
diff_use_color: bool = attr.ib( True, instance_of(bool), metadata={"help": HELP_DIFF_USE_COLOR} ) diff_color_words: bool = attr.ib( False, instance_of(bool), metadata={"help": HELP_DIFF_COLOR_WORDS} ) force_regen: bool = attr.ib( False, instance_of(bool), metadata={"help": HELP_FORCE_REGEN} ) def __setattr__(self, key, value): """Add validation when setting attributes.""" x_attr = getattr(attr.fields(self.__class__), key) if x_attr.validator: x_attr.validator(self, x_attr, value) super().__setattr__(key, value)
[docs] def check( self, path: Union[TextIO, str], raise_errors: bool = True ) -> NBRegressionResult: """Execute the Notebook and compare its initial vs. final contents. if ``force_regen`` is True, the new notebook will be written to ``path`` if ``raise_errors`` is True: :raise nbconvert.preprocessors.CellExecutionError: if error in execution :raise NBConfigValidationError: if the notebook metadata is invalid :raise NBRegressionError: if diffs present :rtype: NBRegressionResult """ __tracebackhide__ = True if hasattr(path, "name"): abspath = os.path.abspath(path.name) else: abspath = os.path.abspath(str(path)) logger.debug(f"Checking file: {abspath}") nb_initial, nb_config = load_notebook_with_config(path) resources = copy.deepcopy(self.process_resources) if not self.exec_cwd: self.exec_cwd = os.path.dirname(abspath) if self.exec_notebook: logger.debug("Executing notebook.") exec_results = execute_notebook( nb_initial, resources=resources, cwd=self.exec_cwd, timeout=self.exec_timeout, allow_errors=self.exec_allow_errors, with_coverage=self.coverage, cov_config_file=self.cov_config, cov_source=self.cov_source, ) exec_error = exec_results.exec_error nb_final = exec_results.notebook resources = exec_results.resources else: exec_error = None nb_final = nb_initial # TODO merge on fail option (using pytest-cov --no-cov-on-fail) if self.cov_merge and exec_results.has_coverage: logger.info("Merging coverage.") self.cov_merge.get_data().update( exec_results.coverage_data(debug=self.cov_merge._debug), aliases=_get_coverage_aliases(self.cov_merge), ) # we also take this opportunity to remove '' # from the unmatched source packages, which is caused by using `--cov=` self.cov_merge._inorout.source_pkgs_unmatched = [ p for p in self.cov_merge._inorout.source_pkgs_unmatched if p ] for proc_name in self.post_processors: logger.debug(f"Applying post processor: {proc_name}") post_proc = load_processor(proc_name) nb_final, resources = post_proc(nb_final, resources) regex_replace = list(self.diff_replace) + list(nb_config.diff_replace) if regex_replace: logger.debug(f"Applying replacements: {regex_replace}") nb_initial_replace = regex_replace_nb(nb_initial, regex_replace) nb_final_replace = regex_replace_nb(nb_final, regex_replace) else: nb_initial_replace = nb_initial nb_final_replace = nb_final full_diff = diff_notebooks(nb_initial_replace, nb_final_replace) diff_ignore = copy.deepcopy(nb_config.diff_ignore) diff_ignore.update(self.diff_ignore) logger.debug(f"filtering diff by ignoring: {diff_ignore}") filtered_diff = filter_diff(full_diff, diff_ignore) diff_string = diff_to_string( nb_initial_replace, filtered_diff, use_color=self.diff_use_color, color_words=self.diff_color_words, ) # TODO optionally write diff to file regen_exc = None if filtered_diff and self.force_regen and not exec_error: if hasattr(path, "close") and hasattr(path, "name"): path.close() with open(path.name, "w") as handle: nbformat.write(nb_final, handle) else: nbformat.write(nb_final, str(path)) regen_exc = NBRegressionError( f"Files differ and --nb-force-regen set, " f"regenerating file at:\n- {abspath}" ) if not raise_errors: pass elif exec_error: print("Diff up to exception:\n" + diff_string, file=sys.stderr) raise exec_error elif regen_exc: print("Diff before regeneration:\n" + diff_string, file=sys.stderr) raise regen_exc elif filtered_diff: raise NBRegressionError(diff_string) return NBRegressionResult( nb_initial, nb_final, full_diff, filtered_diff, diff_string, resources )
[docs]def _get_coverage_aliases(cov): """Retrieve path aliases from coverage.Coverage object.""" from coverage.files import PathAliases aliases = None if cov.config.paths: aliases = PathAliases() for paths in cov.config.paths.values(): result = paths[0] for pattern in paths[1:]: aliases.add(pattern, result) return aliases