summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CONTRIBUTORS.txt2
-rw-r--r--ChangeLog2
-rw-r--r--pylint/checkers/similar.py16
-rw-r--r--tests/checkers/unittest_similar.py36
-rw-r--r--tests/input/similar325
-rw-r--r--tests/input/similar425
6 files changed, 100 insertions, 6 deletions
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index 66369aa6e..64c328f5c 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -403,3 +403,5 @@ contributors:
* Yeting Li (yetingli): contributor
* Frost Ming (frostming): contributor
+
+* Eli Fine (eli88fine): Fixed false positive duplicate code warning for lines with symbols only
diff --git a/ChangeLog b/ChangeLog
index a5d9d531a..0edde4917 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -23,6 +23,8 @@ Release date: TBA
* Fix a crash when a specified config file does not exist
+* Fix ``duplicate-code`` false positive when lines only contain whitespace and non-alphanumeric characters (e.g. parentheses, bracket, comman, etc.)
+
What's New in Pylint 2.6.0?
===========================
diff --git a/pylint/checkers/similar.py b/pylint/checkers/similar.py
index 58dc0f807..82f79e8cc 100644
--- a/pylint/checkers/similar.py
+++ b/pylint/checkers/similar.py
@@ -14,6 +14,7 @@
# Copyright (c) 2019 Taewon D. Kim <kimt33@mcmaster.ca>
# Copyright (c) 2019 Pierre Sassoulas <pierre.sassoulas@gmail.com>
# Copyright (c) 2020 Shiv Venkatasubrahmanyam <shvenkat@users.noreply.github.com>
+# Copyright (c) 2020 Eli Fine <ejfine@gmail.com>
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
@@ -22,6 +23,7 @@
"""a similarities / code duplication command line tool and pylint checker
"""
+import re
import sys
from collections import defaultdict
from getopt import getopt
@@ -34,6 +36,8 @@ from pylint.interfaces import IRawChecker
from pylint.reporters.ureports.nodes import Table
from pylint.utils import decoding_stream
+REGEX_FOR_LINES_WITH_CONTENT = re.compile(r".*\w+")
+
class Similar:
"""finds copy-pasted lines of code in a project"""
@@ -129,21 +133,21 @@ class Similar:
skip = 1
num = 0
for index2 in find(lineset1[index1]):
- non_blank = 0
+ num_lines_with_content = 0
for num, ((_, line1), (_, line2)) in enumerate(
zip(lines1(index1), lines2(index2))
):
if line1 != line2:
- if non_blank > min_lines:
+ if num_lines_with_content > min_lines:
yield num, lineset1, index1, lineset2, index2
skip = max(skip, num)
break
- if line1:
- non_blank += 1
+ if re.match(REGEX_FOR_LINES_WITH_CONTENT, line1):
+ num_lines_with_content += 1
else:
- # we may have reach the end
+ # we may have reached the end
num += 1
- if non_blank > min_lines:
+ if num_lines_with_content > min_lines:
yield num, lineset1, index1, lineset2, index2
skip = max(skip, num)
index1 += skip
diff --git a/tests/checkers/unittest_similar.py b/tests/checkers/unittest_similar.py
index 81e752780..ed4af2f5c 100644
--- a/tests/checkers/unittest_similar.py
+++ b/tests/checkers/unittest_similar.py
@@ -9,6 +9,7 @@
# Copyright (c) 2019-2020 Pierre Sassoulas <pierre.sassoulas@gmail.com>
# Copyright (c) 2019 Ashley Whetter <ashley@awhetter.co.uk>
# Copyright (c) 2019 Taewon D. Kim <kimt33@mcmaster.ca>
+# Copyright (c) 2020 Eli Fine <ejfine@gmail.com>
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
@@ -24,6 +25,8 @@ from pylint.checkers import similar
INPUT = Path(__file__).parent / ".." / "input"
SIMILAR1 = str(INPUT / "similar1")
SIMILAR2 = str(INPUT / "similar2")
+SIMILAR3 = str(INPUT / "similar3")
+SIMILAR4 = str(INPUT / "similar4")
MULTILINE = str(INPUT / "multiline-import")
HIDE_CODE_WITH_IMPORTS = str(INPUT / "hide_code_with_imports.py")
@@ -178,6 +181,39 @@ TOTAL lines=60 duplicates=5 percent=8.33
)
+def test_lines_without_meaningful_content_do_not_trigger_similarity():
+ output = StringIO()
+ with redirect_stdout(output), pytest.raises(SystemExit) as ex:
+ similar.Run([SIMILAR3, SIMILAR4])
+ assert ex.value.code == 0
+ assert (
+ output.getvalue().strip()
+ == (
+ """
+14 similar lines in 2 files
+==%s:11
+==%s:11
+ b = (
+ (
+ [
+ "Lines 12-25 still trigger a similarity...",
+ "...warning, because..."
+ ],
+ [
+ "...even after ignoring lines with only symbols..."
+ ],
+ ),
+ (
+ "...there are still 5 similar lines in this code block.",
+ )
+ )
+TOTAL lines=50 duplicates=14 percent=28.00
+"""
+ % (SIMILAR3, SIMILAR4)
+ ).strip()
+ )
+
+
def test_help():
output = StringIO()
with redirect_stdout(output):
diff --git a/tests/input/similar3 b/tests/input/similar3
new file mode 100644
index 000000000..337431fe5
--- /dev/null
+++ b/tests/input/similar3
@@ -0,0 +1,25 @@
+a = (
+ (
+ [
+ "Lines 1-8 used to trigger a similarity warning, but now they don't because...",
+ "...lines with only symbols are ignored and don't count in the line count limit."
+ ]
+ ),
+)
+
+"This line in similar3 breaks up the sections of code by being different than similar4"
+
+b = (
+ (
+ [
+ "Lines 12-25 still trigger a similarity...",
+ "...warning, because..."
+ ],
+ [
+ "...even after ignoring lines with only symbols..."
+ ],
+ ),
+ (
+ "...there are still 5 similar lines in this code block.",
+ )
+)
diff --git a/tests/input/similar4 b/tests/input/similar4
new file mode 100644
index 000000000..abdad26db
--- /dev/null
+++ b/tests/input/similar4
@@ -0,0 +1,25 @@
+a = (
+ (
+ [
+ "Lines 1-8 used to trigger a similarity warning, but now they don't because...",
+ "...lines with only symbols are ignored and don't count in the line count limit."
+ ]
+ ),
+)
+
+"This line in similar4 breaks up the sections of code by being different than similar3"
+
+b = (
+ (
+ [
+ "Lines 12-25 still trigger a similarity...",
+ "...warning, because..."
+ ],
+ [
+ "...even after ignoring lines with only symbols..."
+ ],
+ ),
+ (
+ "...there are still 5 similar lines in this code block.",
+ )
+)