Skip to content

Commit 7b82cc8

Browse files
Deduplicate module file paths to prevent redundant scans. (#7747)
* Use dict for 'expand_modules' result rather than list. With 'path' as the key, we get deduplication for free and do not need to reprocess the list for deduplication later. * Fix deduplication to account for CLI args marker. * Fix corner case with CLI arg flag handling during deduplication. * Add 'deduplication' to custom Pyenchant dict. Closes #6242 Closes #4053 Co-authored-by: Eric McDonald <[email protected]> Co-authored-by: Pierre Sassoulas <[email protected]>
1 parent 83d9fdc commit 7b82cc8

File tree

6 files changed

+109
-34
lines changed

6 files changed

+109
-34
lines changed

.pyenchant_pylint_custom_dict.txt

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ cyclomatic
7474
dataclass
7575
datetime
7676
debian
77+
deduplication
7778
deepcopy
7879
defaultdicts
7980
defframe

doc/whatsnew/fragments/6242.bugfix

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Pylint will now filter duplicates given to it before linting. The output should
2+
be the same whether a file is given/discovered multiple times or not.
3+
4+
Closes #6242, #4053

pylint/lint/expand_modules.py

+16-14
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,11 @@ def expand_modules(
6666
ignore_list: list[str],
6767
ignore_list_re: list[Pattern[str]],
6868
ignore_list_paths_re: list[Pattern[str]],
69-
) -> tuple[list[ModuleDescriptionDict], list[ErrorDescriptionDict]]:
69+
) -> tuple[dict[str, ModuleDescriptionDict], list[ErrorDescriptionDict]]:
7070
"""Take a list of files/modules/packages and return the list of tuple
7171
(file, module name) which have to be actually checked.
7272
"""
73-
result: list[ModuleDescriptionDict] = []
73+
result: dict[str, ModuleDescriptionDict] = {}
7474
errors: list[ErrorDescriptionDict] = []
7575
path = sys.path.copy()
7676

@@ -120,15 +120,17 @@ def expand_modules(
120120
is_namespace = modutils.is_namespace(spec)
121121
is_directory = modutils.is_directory(spec)
122122
if not is_namespace:
123-
result.append(
124-
{
123+
if filepath in result:
124+
# Always set arg flag if module explicitly given.
125+
result[filepath]["isarg"] = True
126+
else:
127+
result[filepath] = {
125128
"path": filepath,
126129
"name": modname,
127130
"isarg": True,
128131
"basepath": filepath,
129132
"basename": modname,
130133
}
131-
)
132134
has_init = (
133135
not (modname.endswith(".__init__") or modname == "__init__")
134136
and os.path.basename(filepath) == "__init__.py"
@@ -148,13 +150,13 @@ def expand_modules(
148150
subfilepath, is_namespace, path=additional_search_path
149151
)
150152
submodname = ".".join(modpath)
151-
result.append(
152-
{
153-
"path": subfilepath,
154-
"name": submodname,
155-
"isarg": False,
156-
"basepath": filepath,
157-
"basename": modname,
158-
}
159-
)
153+
# Preserve arg flag if module is also explicitly given.
154+
isarg = subfilepath in result and result[subfilepath]["isarg"]
155+
result[subfilepath] = {
156+
"path": subfilepath,
157+
"name": submodname,
158+
"isarg": isarg,
159+
"basepath": filepath,
160+
"basename": modname,
161+
}
160162
return result, errors

pylint/lint/pylinter.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -879,12 +879,12 @@ def _iterate_file_descrs(
879879
880880
The returned generator yield one item for each Python module that should be linted.
881881
"""
882-
for descr in self._expand_files(files_or_modules):
882+
for descr in self._expand_files(files_or_modules).values():
883883
name, filepath, is_arg = descr["name"], descr["path"], descr["isarg"]
884884
if self.should_analyze_file(name, filepath, is_argument=is_arg):
885885
yield FileItem(name, filepath, descr["basename"])
886886

887-
def _expand_files(self, modules: Sequence[str]) -> list[ModuleDescriptionDict]:
887+
def _expand_files(self, modules: Sequence[str]) -> dict[str, ModuleDescriptionDict]:
888888
"""Get modules and errors from a list of modules and handle errors."""
889889
result, errors = expand_modules(
890890
modules,

tests/lint/unittest_expand_modules.py

+63-18
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ def test__is_in_ignore_list_re_match() -> None:
4545
"path": EXPAND_MODULES,
4646
}
4747

48+
this_file_from_init_deduplicated = {
49+
"basename": "lint",
50+
"basepath": INIT_PATH,
51+
"isarg": True,
52+
"name": "lint.unittest_expand_modules",
53+
"path": EXPAND_MODULES,
54+
}
55+
4856
unittest_lint = {
4957
"basename": "lint",
5058
"basepath": INIT_PATH,
@@ -77,7 +85,6 @@ def test__is_in_ignore_list_re_match() -> None:
7785
"path": str(TEST_DIRECTORY / "lint/test_caching.py"),
7886
}
7987

80-
8188
init_of_package = {
8289
"basename": "lint",
8390
"basepath": INIT_PATH,
@@ -87,6 +94,20 @@ def test__is_in_ignore_list_re_match() -> None:
8794
}
8895

8996

97+
def _list_expected_package_modules(
98+
deduplicating: bool = False,
99+
) -> tuple[dict[str, object], ...]:
100+
"""Generates reusable list of modules for our package."""
101+
return (
102+
init_of_package,
103+
test_caching,
104+
test_pylinter,
105+
test_utils,
106+
this_file_from_init_deduplicated if deduplicating else this_file_from_init,
107+
unittest_lint,
108+
)
109+
110+
90111
class TestExpandModules(CheckerTestCase):
91112
"""Test the expand_modules function while allowing options to be set."""
92113

@@ -102,23 +123,19 @@ class Checker(BaseChecker):
102123
@pytest.mark.parametrize(
103124
"files_or_modules,expected",
104125
[
105-
([__file__], [this_file]),
126+
([__file__], {this_file["path"]: this_file}),
106127
(
107128
[str(Path(__file__).parent)],
108-
[
109-
init_of_package,
110-
test_caching,
111-
test_pylinter,
112-
test_utils,
113-
this_file_from_init,
114-
unittest_lint,
115-
],
129+
{
130+
module["path"]: module # pylint: disable=unsubscriptable-object
131+
for module in _list_expected_package_modules()
132+
},
116133
),
117134
],
118135
)
119136
@set_config(ignore_paths="")
120137
def test_expand_modules(
121-
self, files_or_modules: list[str], expected: list[ModuleDescriptionDict]
138+
self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
122139
) -> None:
123140
"""Test expand_modules with the default value of ignore-paths."""
124141
ignore_list: list[str] = []
@@ -129,25 +146,54 @@ def test_expand_modules(
129146
ignore_list_re,
130147
self.linter.config.ignore_paths,
131148
)
132-
modules.sort(key=lambda d: d["name"])
133149
assert modules == expected
134150
assert not errors
135151

136152
@pytest.mark.parametrize(
137153
"files_or_modules,expected",
138154
[
139-
([__file__], []),
155+
([__file__, __file__], {this_file["path"]: this_file}),
156+
(
157+
[EXPAND_MODULES, str(Path(__file__).parent), EXPAND_MODULES],
158+
{
159+
module["path"]: module # pylint: disable=unsubscriptable-object
160+
for module in _list_expected_package_modules(deduplicating=True)
161+
},
162+
),
163+
],
164+
)
165+
@set_config(ignore_paths="")
166+
def test_expand_modules_deduplication(
167+
self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
168+
) -> None:
169+
"""Test expand_modules deduplication."""
170+
ignore_list: list[str] = []
171+
ignore_list_re: list[re.Pattern[str]] = []
172+
modules, errors = expand_modules(
173+
files_or_modules,
174+
ignore_list,
175+
ignore_list_re,
176+
self.linter.config.ignore_paths,
177+
)
178+
assert modules == expected
179+
assert not errors
180+
181+
@pytest.mark.parametrize(
182+
"files_or_modules,expected",
183+
[
184+
([__file__], {}),
140185
(
141186
[str(Path(__file__).parent)],
142-
[
143-
init_of_package,
144-
],
187+
{
188+
module["path"]: module # pylint: disable=unsubscriptable-object
189+
for module in (init_of_package,)
190+
},
145191
),
146192
],
147193
)
148194
@set_config(ignore_paths=".*/lint/.*")
149195
def test_expand_modules_with_ignore(
150-
self, files_or_modules: list[str], expected: list[ModuleDescriptionDict]
196+
self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
151197
) -> None:
152198
"""Test expand_modules with a non-default value of ignore-paths."""
153199
ignore_list: list[str] = []
@@ -158,6 +204,5 @@ def test_expand_modules_with_ignore(
158204
ignore_list_re,
159205
self.linter.config.ignore_paths,
160206
)
161-
modules.sort(key=lambda d: d["name"])
162207
assert modules == expected
163208
assert not errors

tests/test_pylint_runners.py

+23
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
from __future__ import annotations
77

8+
import contextlib
89
import os
910
import pathlib
11+
import shlex
1012
import sys
1113
from collections.abc import Sequence
1214
from io import BufferedReader
@@ -57,6 +59,27 @@ def test_runner_with_arguments(runner: _RunCallable, tmpdir: LocalPath) -> None:
5759
assert err.value.code == 0
5860

5961

62+
def test_pylint_argument_deduplication(
63+
tmpdir: LocalPath, tests_directory: pathlib.Path
64+
) -> None:
65+
"""Check that the Pylint runner does not over-report on duplicate
66+
arguments.
67+
68+
See https://github.com/PyCQA/pylint/issues/6242 and
69+
https://github.com/PyCQA/pylint/issues/4053
70+
"""
71+
filepath = str(tests_directory / "functional/t/too/too_many_branches.py")
72+
testargs = shlex.split("--report n --score n --max-branches 13")
73+
testargs.extend([filepath] * 4)
74+
exit_stack = contextlib.ExitStack()
75+
exit_stack.enter_context(tmpdir.as_cwd())
76+
exit_stack.enter_context(patch.object(sys, "argv", testargs))
77+
err = exit_stack.enter_context(pytest.raises(SystemExit))
78+
with exit_stack:
79+
run_pylint(testargs)
80+
assert err.value.code == 0
81+
82+
6083
def test_pylint_run_jobs_equal_zero_dont_crash_with_cpu_fraction(
6184
tmpdir: LocalPath,
6285
) -> None:

0 commit comments

Comments
 (0)