diff options
-rw-r--r-- | CONTRIBUTORS.txt | 2 | ||||
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | pylint/checkers/similar.py | 16 | ||||
-rw-r--r-- | tests/checkers/unittest_similar.py | 36 | ||||
-rw-r--r-- | tests/input/similar3 | 25 | ||||
-rw-r--r-- | tests/input/similar4 | 25 |
6 files changed, 100 insertions, 6 deletions
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 66369aa6e..64c328f5c 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -403,3 +403,5 @@ contributors: * Yeting Li (yetingli): contributor * Frost Ming (frostming): contributor + +* Eli Fine (eli88fine): Fixed false positive duplicate code warning for lines with symbols only @@ -23,6 +23,8 @@ Release date: TBA * Fix a crash when a specified config file does not exist +* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comman, etc.) + What's New in Pylint 2.6.0? =========================== diff --git a/pylint/checkers/similar.py b/pylint/checkers/similar.py index 58dc0f807..82f79e8cc 100644 --- a/pylint/checkers/similar.py +++ b/pylint/checkers/similar.py @@ -14,6 +14,7 @@ # Copyright (c) 2019 Taewon D. Kim <kimt33@mcmaster.ca> # Copyright (c) 2019 Pierre Sassoulas <pierre.sassoulas@gmail.com> # Copyright (c) 2020 Shiv Venkatasubrahmanyam <shvenkat@users.noreply.github.com> +# Copyright (c) 2020 Eli Fine <ejfine@gmail.com> # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/master/COPYING @@ -22,6 +23,7 @@ """a similarities / code duplication command line tool and pylint checker """ +import re import sys from collections import defaultdict from getopt import getopt @@ -34,6 +36,8 @@ from pylint.interfaces import IRawChecker from pylint.reporters.ureports.nodes import Table from pylint.utils import decoding_stream +REGEX_FOR_LINES_WITH_CONTENT = re.compile(r".*\w+") + class Similar: """finds copy-pasted lines of code in a project""" @@ -129,21 +133,21 @@ class Similar: skip = 1 num = 0 for index2 in find(lineset1[index1]): - non_blank = 0 + num_lines_with_content = 0 for num, ((_, line1), (_, line2)) in enumerate( zip(lines1(index1), lines2(index2)) ): if line1 != line2: - if non_blank > min_lines: + if num_lines_with_content > min_lines: yield num, lineset1, index1, lineset2, index2 skip = max(skip, num) break - if line1: - non_blank += 1 + if re.match(REGEX_FOR_LINES_WITH_CONTENT, line1): + num_lines_with_content += 1 else: - # we may have reach the end + # we may have reached the end num += 1 - if non_blank > min_lines: + if num_lines_with_content > min_lines: yield num, lineset1, index1, lineset2, index2 skip = max(skip, num) index1 += skip diff --git a/tests/checkers/unittest_similar.py b/tests/checkers/unittest_similar.py index 81e752780..ed4af2f5c 100644 --- a/tests/checkers/unittest_similar.py +++ b/tests/checkers/unittest_similar.py @@ -9,6 +9,7 @@ # Copyright (c) 2019-2020 Pierre Sassoulas <pierre.sassoulas@gmail.com> # Copyright (c) 2019 Ashley Whetter <ashley@awhetter.co.uk> # Copyright (c) 2019 Taewon D. Kim <kimt33@mcmaster.ca> +# Copyright (c) 2020 Eli Fine <ejfine@gmail.com> # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/master/COPYING @@ -24,6 +25,8 @@ from pylint.checkers import similar INPUT = Path(__file__).parent / ".." / "input" SIMILAR1 = str(INPUT / "similar1") SIMILAR2 = str(INPUT / "similar2") +SIMILAR3 = str(INPUT / "similar3") +SIMILAR4 = str(INPUT / "similar4") MULTILINE = str(INPUT / "multiline-import") HIDE_CODE_WITH_IMPORTS = str(INPUT / "hide_code_with_imports.py") @@ -178,6 +181,39 @@ TOTAL lines=60 duplicates=5 percent=8.33 ) +def test_lines_without_meaningful_content_do_not_trigger_similarity(): + output = StringIO() + with redirect_stdout(output), pytest.raises(SystemExit) as ex: + similar.Run([SIMILAR3, SIMILAR4]) + assert ex.value.code == 0 + assert ( + output.getvalue().strip() + == ( + """ +14 similar lines in 2 files +==%s:11 +==%s:11 + b = ( + ( + [ + "Lines 12-25 still trigger a similarity...", + "...warning, because..." + ], + [ + "...even after ignoring lines with only symbols..." + ], + ), + ( + "...there are still 5 similar lines in this code block.", + ) + ) +TOTAL lines=50 duplicates=14 percent=28.00 +""" + % (SIMILAR3, SIMILAR4) + ).strip() + ) + + def test_help(): output = StringIO() with redirect_stdout(output): diff --git a/tests/input/similar3 b/tests/input/similar3 new file mode 100644 index 000000000..337431fe5 --- /dev/null +++ b/tests/input/similar3 @@ -0,0 +1,25 @@ +a = ( + ( + [ + "Lines 1-8 used to trigger a similarity warning, but now they don't because...", + "...lines with only symbols are ignored and don't count in the line count limit." + ] + ), +) + +"This line in similar3 breaks up the sections of code by being different than similar4" + +b = ( + ( + [ + "Lines 12-25 still trigger a similarity...", + "...warning, because..." + ], + [ + "...even after ignoring lines with only symbols..." + ], + ), + ( + "...there are still 5 similar lines in this code block.", + ) +) diff --git a/tests/input/similar4 b/tests/input/similar4 new file mode 100644 index 000000000..abdad26db --- /dev/null +++ b/tests/input/similar4 @@ -0,0 +1,25 @@ +a = ( + ( + [ + "Lines 1-8 used to trigger a similarity warning, but now they don't because...", + "...lines with only symbols are ignored and don't count in the line count limit." + ] + ), +) + +"This line in similar4 breaks up the sections of code by being different than similar3" + +b = ( + ( + [ + "Lines 12-25 still trigger a similarity...", + "...warning, because..." + ], + [ + "...even after ignoring lines with only symbols..." + ], + ), + ( + "...there are still 5 similar lines in this code block.", + ) +) |