Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --remove-all-notebook-metadata flag #163

Merged
merged 1 commit into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,10 @@ The check can be run with the following flags:
- To ignore cell outputs use `--preserve-cell-outputs` or the short form `-o`.
- To ignore cell execution counts use `--preserve-execution-counts` or the short
form `-c`.
- To ignore notebook metadata (such as language version) use
- To ignore language version notebook metadata use
`--preserve-notebook-metadata` or the short form `-n`.
- To check the notebook does not contain any notebook metadata use
`--remove-all-notebook-metadata` or the short form `-M`.

For example, to check if a notebook is clean whilst ignoring notebook metadata:

Expand Down Expand Up @@ -126,6 +128,8 @@ The cleaning can be run with the following flags:
short form `-c`.
- To preserve notebook metadata (such as language version) use
`--preserve-notebook-metadata` or the short form `-n`.
- To remove all notebook metadata use `--remove-all-notebook-metadata` or the
short form `-M`.

For example, to clean a notebook whilst preserving notebook metadata:

Expand Down Expand Up @@ -186,12 +190,19 @@ To preserve cell execution counts, use:
nb-clean add-filter --preserve-execution-counts
```

To preserve notebook metadata, such as language version, use:
To preserve notebook `language_info.version` metadata, use:

```bash
nb-clean add-filter --preserve-notebook-metadata
```

By default, `nb-clean` will not delete all notebook metadata. To completely
remove all notebook metadata:

```bash
nb-clean add-filter --remove-all-notebook-metadata
```

`nb-clean` will configure a filter in the Git repository in which it is run, and
won't mutate your global or system Git configuration. To remove the filter, run:

Expand Down
32 changes: 31 additions & 1 deletion src/nb_clean/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def git_attributes_path() -> pathlib.Path:
def add_git_filter(
*,
remove_empty_cells: bool = False,
remove_all_notebook_metadata: bool = False,
preserve_cell_metadata: Collection[str] | None = None,
preserve_cell_outputs: bool = False,
preserve_execution_counts: bool = False,
Expand All @@ -93,6 +94,8 @@ def add_git_filter(
----------
remove_empty_cells : bool, default False
If True, remove empty cells.
remove_all_notebook_metadata : bool, default False
If True, remove all notebook metadata.
preserve_cell_metadata : list of str or None, default None
If None, clean all cell metadata.
If [], preserve all cell metadata.
Expand All @@ -106,6 +109,10 @@ def add_git_filter(
If True, preserve notebook metadata such as language version.

"""
if preserve_notebook_metadata and remove_all_notebook_metadata:
msg = "`preserve_notebook_metadata` and `remove_all_notebook_metadata` cannot both be `True`"
raise ValueError(msg)

command = ["nb-clean", "clean"]

if remove_empty_cells:
Expand All @@ -128,6 +135,9 @@ def add_git_filter(
if preserve_notebook_metadata:
command.append("--preserve-notebook-metadata")

if remove_all_notebook_metadata:
command.append("--remove-all-notebook-metadata")

git("config", "filter.nb-clean.clean", " ".join(command))

attributes_path = git_attributes_path()
Expand Down Expand Up @@ -159,6 +169,7 @@ def check_notebook(
notebook: nbformat.NotebookNode,
*,
remove_empty_cells: bool = False,
remove_all_notebook_metadata: bool = False,
preserve_cell_metadata: Collection[str] | None = None,
preserve_cell_outputs: bool = False,
preserve_execution_counts: bool = False,
Expand All @@ -173,6 +184,8 @@ def check_notebook(
The notebook.
remove_empty_cells : bool, default False
If True, also check for the presence of empty cells.
remove_all_notebook_metadata : bool, default False
If True, also check for the presence of any notebook metadata.
preserve_cell_metadata : list of str or None, default None
If None, check for all cell metadata.
If [], don't check for any cell metadata.
Expand All @@ -193,6 +206,10 @@ def check_notebook(
True if the notebook is clean, False otherwise.

"""
if preserve_notebook_metadata and remove_all_notebook_metadata:
msg = "`preserve_notebook_metadata` and `remove_all_notebook_metadata` cannot both be `True`"
raise ValueError(msg)

is_clean = True

for index, cell in enumerate(notebook.cells):
Expand Down Expand Up @@ -227,6 +244,10 @@ def check_notebook(
print(f"{prefix}: outputs")
is_clean = False

if remove_all_notebook_metadata and notebook.metadata:
print(f"{filename}: metadata")
is_clean = False

if not preserve_notebook_metadata:
with contextlib.suppress(KeyError):
_ = notebook["metadata"]["language_info"]["version"]
Expand All @@ -240,6 +261,7 @@ def clean_notebook(
notebook: nbformat.NotebookNode,
*,
remove_empty_cells: bool = False,
remove_all_notebook_metadata: bool = False,
preserve_cell_metadata: Collection[str] | None = None,
preserve_cell_outputs: bool = False,
preserve_execution_counts: bool = False,
Expand All @@ -253,6 +275,8 @@ def clean_notebook(
The notebook.
remove_empty_cells : bool, default False
If True, remove empty cells.
remove_all_notebook_metadata : bool, default False
If True, remove all notebook metadata.
preserve_cell_metadata : list of str or None, default None
If None, clean all cell metadata.
If [], preserve all cell metadata.
Expand All @@ -271,6 +295,10 @@ def clean_notebook(
The cleaned notebook.

"""
if preserve_notebook_metadata and remove_all_notebook_metadata:
msg = "`preserve_notebook_metadata` and `remove_all_notebook_metadata` cannot both be `True`"
raise ValueError(msg)

if remove_empty_cells:
notebook.cells = [cell for cell in notebook.cells if cell["source"]]

Expand All @@ -294,7 +322,9 @@ def clean_notebook(
else:
cell["outputs"] = []

if not preserve_notebook_metadata:
if remove_all_notebook_metadata:
notebook.metadata = {}
elif not preserve_notebook_metadata:
with contextlib.suppress(KeyError):
del notebook["metadata"]["language_info"]["version"]

Expand Down
21 changes: 21 additions & 0 deletions src/nb_clean/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def add_filter(args: argparse.Namespace) -> None:
try:
nb_clean.add_git_filter(
remove_empty_cells=args.remove_empty_cells,
remove_all_notebook_metadata=args.remove_all_notebook_metadata,
preserve_cell_metadata=args.preserve_cell_metadata,
preserve_cell_outputs=args.preserve_cell_outputs,
preserve_execution_counts=args.preserve_execution_counts,
Expand Down Expand Up @@ -114,6 +115,7 @@ def check(args: argparse.Namespace) -> None:
is_clean = nb_clean.check_notebook(
notebook,
remove_empty_cells=args.remove_empty_cells,
remove_all_notebook_metadata=args.remove_all_notebook_metadata,
preserve_cell_metadata=args.preserve_cell_metadata,
preserve_cell_outputs=args.preserve_cell_outputs,
preserve_execution_counts=args.preserve_execution_counts,
Expand Down Expand Up @@ -152,6 +154,7 @@ def clean(args: argparse.Namespace) -> None:
notebook = nb_clean.clean_notebook(
notebook,
remove_empty_cells=args.remove_empty_cells,
remove_all_notebook_metadata=args.remove_all_notebook_metadata,
preserve_cell_metadata=args.preserve_cell_metadata,
preserve_cell_outputs=args.preserve_cell_outputs,
preserve_execution_counts=args.preserve_execution_counts,
Expand Down Expand Up @@ -181,6 +184,12 @@ def parse_args(args: list[str]) -> argparse.Namespace:
add_filter_parser.add_argument(
"-e", "--remove-empty-cells", action="store_true", help="remove empty cells"
)
add_filter_parser.add_argument(
"-M",
"--remove-all-notebook-metadata",
action="store_true",
help="remove all notebook metadata",
)
add_filter_parser.add_argument(
"-m",
"--preserve-cell-metadata",
Expand Down Expand Up @@ -226,6 +235,12 @@ def parse_args(args: list[str]) -> argparse.Namespace:
check_parser.add_argument(
"-e", "--remove-empty-cells", action="store_true", help="check for empty cells"
)
check_parser.add_argument(
"-M",
"--remove-all-notebook-metadata",
action="store_true",
help="check for any notebook metadata",
)
check_parser.add_argument(
"-m",
"--preserve-cell-metadata",
Expand Down Expand Up @@ -262,6 +277,12 @@ def parse_args(args: list[str]) -> argparse.Namespace:
clean_parser.add_argument(
"-e", "--remove-empty-cells", action="store_true", help="remove empty cells"
)
clean_parser.add_argument(
"-M",
"--remove-all-notebook-metadata",
action="store_true",
help="remove all notebook metadata",
)
clean_parser.add_argument(
"-m",
"--preserve-cell-metadata",
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,9 @@ def clean_notebook_with_outputs() -> nbformat.NotebookNode:
def clean_notebook_with_outputs_with_counts() -> nbformat.NotebookNode:
"""Return a notebook with cell outputs and output execution counts."""
return read_notebook("clean_with_outputs_with_counts.ipynb")


@pytest.fixture()
def clean_notebook_without_notebook_metadata() -> nbformat.NotebookNode:
"""Return a clean notebook without notebook metadata."""
return read_notebook("clean_without_notebook_metadata.ipynb")
33 changes: 33 additions & 0 deletions tests/notebooks/clean_without_notebook_metadata.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = \"Hello, world\"\n",
"text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
21 changes: 21 additions & 0 deletions tests/notebooks/dirty_empty_octave.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "10cfba24-bab5-47a0-9ab8-5d1fc01f1f58",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Octave",
"language": "octave",
"name": "octave"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
48 changes: 48 additions & 0 deletions tests/test_check_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,51 @@ def test_check_notebook_preserve_execution_counts(
notebook, preserve_execution_counts=preserve_execution_counts
)
assert output is is_clean


@pytest.mark.parametrize(
("notebook_name", "remove_all_notebook_metadata", "is_clean"),
[
("clean_notebook_with_notebook_metadata", True, False),
("clean_notebook_with_notebook_metadata", False, False),
("clean_notebook_without_notebook_metadata", True, True),
("clean_notebook_without_notebook_metadata", False, True),
("clean_notebook", True, False),
("clean_notebook", False, True),
],
)
def test_check_notebook_remove_all_notebook_metadata(
notebook_name: str,
*,
remove_all_notebook_metadata: bool,
is_clean: bool,
request: pytest.FixtureRequest,
) -> None:
"""Test nb_clean.clean_notebook when removing all notebook metadata.

The test with `("clean_notebook_with_notebook_metadata", False, True)` is False due
to `clean_notebook_with_notebook_metadata` containing `language_info.version`
detected when `preserve_notebook_metadata=False`.
"""
notebook = request.getfixturevalue(notebook_name)
assert (
nb_clean.check_notebook(
notebook, remove_all_notebook_metadata=remove_all_notebook_metadata
)
== is_clean
)


def test_check_notebook_exclusive_arguments(
dirty_notebook: nbformat.NotebookNode,
) -> None:
"""Test nb_clean.check_notebook with invalid arguments."""
with pytest.raises(
ValueError,
match="`preserve_notebook_metadata` and `remove_all_notebook_metadata` cannot both be `True`",
):
nb_clean.check_notebook(
dirty_notebook,
remove_all_notebook_metadata=True,
preserve_notebook_metadata=True,
)
26 changes: 26 additions & 0 deletions tests/test_clean_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,29 @@ def test_clean_notebook_preserve_execution_counts(
nb_clean.clean_notebook(dirty_notebook, preserve_execution_counts=True)
== clean_notebook_with_counts
)


def test_clean_notebook_remove_all_notebook_metadata(
dirty_notebook: nbformat.NotebookNode,
clean_notebook_without_notebook_metadata: nbformat.NotebookNode,
) -> None:
"""Test nb_clean.clean_notebook when removing all notebook metadata."""
assert (
nb_clean.clean_notebook(dirty_notebook, remove_all_notebook_metadata=True)
== clean_notebook_without_notebook_metadata
)


def test_clean_notebook_exclusive_arguments(
dirty_notebook: nbformat.NotebookNode,
) -> None:
"""Test nb_clean.clean_notebook with invalid arguments."""
with pytest.raises(
ValueError,
match="`preserve_notebook_metadata` and `remove_all_notebook_metadata` cannot both be `True`",
):
nb_clean.clean_notebook(
dirty_notebook,
remove_all_notebook_metadata=True,
preserve_notebook_metadata=True,
)
Loading