Skip to content

Commit 5a4edde

Browse files
Deduplicate module file paths to prevent redundant scans. (pylint-dev#7747)
* Use dict for 'expand_modules' result rather than list. With 'path' as the key, we get deduplication for free and do not need to reprocess the list for deduplication later. * Fix deduplication to account for CLI args marker. * Fix corner case with CLI arg flag handling during deduplication. * Add 'deduplication' to custom Pyenchant dict. Closes pylint-dev#6242 Closes pylint-dev#4053 Co-authored-by: Eric McDonald <[email protected]> Co-authored-by: Pierre Sassoulas <[email protected]>
1 parent dff0adb commit 5a4edde

File tree

6 files changed

+107
-32
lines changed

6 files changed

+107
-32
lines changed

.pyenchant_pylint_custom_dict.txt

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ cyclomatic
7373
dataclass
7474
datetime
7575
debian
76+
deduplication
7677
deepcopy
7778
defaultdicts
7879
defframe

doc/whatsnew/fragments/6242.bugfix

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Pylint will now filter duplicates given to it before linting. The output should
2+
be the same whether a file is given/discovered multiple times or not.
3+
4+
Closes #6242, #4053

pylint/lint/expand_modules.py

+16-14
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,11 @@ def expand_modules(
6666
ignore_list: list[str],
6767
ignore_list_re: list[Pattern[str]],
6868
ignore_list_paths_re: list[Pattern[str]],
69-
) -> tuple[list[ModuleDescriptionDict], list[ErrorDescriptionDict]]:
69+
) -> tuple[dict[str, ModuleDescriptionDict], list[ErrorDescriptionDict]]:
7070
"""Take a list of files/modules/packages and return the list of tuple
7171
(file, module name) which have to be actually checked.
7272
"""
73-
result: list[ModuleDescriptionDict] = []
73+
result: dict[str, ModuleDescriptionDict] = {}
7474
errors: list[ErrorDescriptionDict] = []
7575
path = sys.path.copy()
7676

@@ -120,15 +120,17 @@ def expand_modules(
120120
is_namespace = modutils.is_namespace(spec)
121121
is_directory = modutils.is_directory(spec)
122122
if not is_namespace:
123-
result.append(
124-
{
123+
if filepath in result:
124+
# Always set arg flag if module explicitly given.
125+
result[filepath]["isarg"] = True
126+
else:
127+
result[filepath] = {
125128
"path": filepath,
126129
"name": modname,
127130
"isarg": True,
128131
"basepath": filepath,
129132
"basename": modname,
130133
}
131-
)
132134
has_init = (
133135
not (modname.endswith(".__init__") or modname == "__init__")
134136
and os.path.basename(filepath) == "__init__.py"
@@ -148,13 +150,13 @@ def expand_modules(
148150
subfilepath, is_namespace, path=additional_search_path
149151
)
150152
submodname = ".".join(modpath)
151-
result.append(
152-
{
153-
"path": subfilepath,
154-
"name": submodname,
155-
"isarg": False,
156-
"basepath": filepath,
157-
"basename": modname,
158-
}
159-
)
153+
# Preserve arg flag if module is also explicitly given.
154+
isarg = subfilepath in result and result[subfilepath]["isarg"]
155+
result[subfilepath] = {
156+
"path": subfilepath,
157+
"name": submodname,
158+
"isarg": isarg,
159+
"basepath": filepath,
160+
"basename": modname,
161+
}
160162
return result, errors

pylint/lint/pylinter.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -871,12 +871,12 @@ def _iterate_file_descrs(
871871
872872
The returned generator yield one item for each Python module that should be linted.
873873
"""
874-
for descr in self._expand_files(files_or_modules):
874+
for descr in self._expand_files(files_or_modules).values():
875875
name, filepath, is_arg = descr["name"], descr["path"], descr["isarg"]
876876
if self.should_analyze_file(name, filepath, is_argument=is_arg):
877877
yield FileItem(name, filepath, descr["basename"])
878878

879-
def _expand_files(self, modules: Sequence[str]) -> list[ModuleDescriptionDict]:
879+
def _expand_files(self, modules: Sequence[str]) -> dict[str, ModuleDescriptionDict]:
880880
"""Get modules and errors from a list of modules and handle errors."""
881881
result, errors = expand_modules(
882882
modules,

tests/lint/unittest_expand_modules.py

+61-16
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ def test__is_in_ignore_list_re_match() -> None:
4545
"path": EXPAND_MODULES,
4646
}
4747

48+
this_file_from_init_deduplicated = {
49+
"basename": "lint",
50+
"basepath": INIT_PATH,
51+
"isarg": True,
52+
"name": "lint.unittest_expand_modules",
53+
"path": EXPAND_MODULES,
54+
}
55+
4856
unittest_lint = {
4957
"basename": "lint",
5058
"basepath": INIT_PATH,
@@ -77,7 +85,6 @@ def test__is_in_ignore_list_re_match() -> None:
7785
"path": str(TEST_DIRECTORY / "lint/test_caching.py"),
7886
}
7987

80-
8188
init_of_package = {
8289
"basename": "lint",
8390
"basepath": INIT_PATH,
@@ -87,6 +94,20 @@ def test__is_in_ignore_list_re_match() -> None:
8794
}
8895

8996

97+
def _list_expected_package_modules(
98+
deduplicating: bool = False,
99+
) -> tuple[dict[str, object], ...]:
100+
"""Generates reusable list of modules for our package."""
101+
return (
102+
init_of_package,
103+
test_caching,
104+
test_pylinter,
105+
test_utils,
106+
this_file_from_init_deduplicated if deduplicating else this_file_from_init,
107+
unittest_lint,
108+
)
109+
110+
90111
class TestExpandModules(CheckerTestCase):
91112
"""Test the expand_modules function while allowing options to be set."""
92113

@@ -102,17 +123,13 @@ class Checker(BaseChecker):
102123
@pytest.mark.parametrize(
103124
"files_or_modules,expected",
104125
[
105-
([__file__], [this_file]),
126+
([__file__], {this_file["path"]: this_file}),
106127
(
107128
[str(Path(__file__).parent)],
108-
[
109-
init_of_package,
110-
test_caching,
111-
test_pylinter,
112-
test_utils,
113-
this_file_from_init,
114-
unittest_lint,
115-
],
129+
{
130+
module["path"]: module # pylint: disable=unsubscriptable-object
131+
for module in _list_expected_package_modules()
132+
},
116133
),
117134
],
118135
)
@@ -126,19 +143,48 @@ def test_expand_modules(self, files_or_modules, expected):
126143
ignore_list_re,
127144
self.linter.config.ignore_paths,
128145
)
129-
modules.sort(key=lambda d: d["name"])
130146
assert modules == expected
131147
assert not errors
132148

133149
@pytest.mark.parametrize(
134150
"files_or_modules,expected",
135151
[
136-
([__file__], []),
152+
([__file__, __file__], {this_file["path"]: this_file}),
153+
(
154+
[EXPAND_MODULES, str(Path(__file__).parent), EXPAND_MODULES],
155+
{
156+
module["path"]: module # pylint: disable=unsubscriptable-object
157+
for module in _list_expected_package_modules(deduplicating=True)
158+
},
159+
),
160+
],
161+
)
162+
@set_config(ignore_paths="")
163+
def test_expand_modules_deduplication(
164+
self, files_or_modules: list[str], expected
165+
) -> None:
166+
"""Test expand_modules deduplication."""
167+
ignore_list: list[str] = []
168+
ignore_list_re: list[re.Pattern[str]] = []
169+
modules, errors = expand_modules(
170+
files_or_modules,
171+
ignore_list,
172+
ignore_list_re,
173+
self.linter.config.ignore_paths,
174+
)
175+
assert modules == expected
176+
assert not errors
177+
178+
@pytest.mark.parametrize(
179+
"files_or_modules,expected",
180+
[
181+
([__file__], {}),
137182
(
138183
[str(Path(__file__).parent)],
139-
[
140-
init_of_package,
141-
],
184+
{
185+
module["path"]: module # pylint: disable=unsubscriptable-object
186+
for module in (init_of_package,)
187+
},
142188
),
143189
],
144190
)
@@ -152,6 +198,5 @@ def test_expand_modules_with_ignore(self, files_or_modules, expected):
152198
ignore_list_re,
153199
self.linter.config.ignore_paths,
154200
)
155-
modules.sort(key=lambda d: d["name"])
156201
assert modules == expected
157202
assert not errors

tests/test_pylint_runners.py

+23
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
from __future__ import annotations
77

8+
import contextlib
89
import os
910
import pathlib
11+
import shlex
1012
import sys
1113
from collections.abc import Callable
1214
from io import BufferedReader
@@ -47,6 +49,27 @@ def test_runner_with_arguments(runner: Callable, tmpdir: LocalPath) -> None:
4749
assert err.value.code == 0
4850

4951

52+
def test_pylint_argument_deduplication(
53+
tmpdir: LocalPath, tests_directory: pathlib.Path
54+
) -> None:
55+
"""Check that the Pylint runner does not over-report on duplicate
56+
arguments.
57+
58+
See https://github.com/PyCQA/pylint/issues/6242 and
59+
https://github.com/PyCQA/pylint/issues/4053
60+
"""
61+
filepath = str(tests_directory / "functional/t/too/too_many_branches.py")
62+
testargs = shlex.split("--report n --score n --max-branches 13")
63+
testargs.extend([filepath] * 4)
64+
exit_stack = contextlib.ExitStack()
65+
exit_stack.enter_context(tmpdir.as_cwd())
66+
exit_stack.enter_context(patch.object(sys, "argv", testargs))
67+
err = exit_stack.enter_context(pytest.raises(SystemExit))
68+
with exit_stack:
69+
run_pylint(testargs)
70+
assert err.value.code == 0
71+
72+
5073
def test_pylint_run_jobs_equal_zero_dont_crash_with_cpu_fraction(
5174
tmpdir: LocalPath,
5275
) -> None:

0 commit comments

Comments
 (0)