summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Cordasco <graffatcolmingov@gmail.com>2016-07-13 01:07:58 +0000
committerIan Cordasco <graffatcolmingov@gmail.com>2016-07-13 01:07:58 +0000
commit4d6929c8ab45ca8116e36ad63d3fb6a55b1177b5 (patch)
tree3054210bf1b865666c0068e024e2e42dc1e4d4de
parent58e67634cd24122e1961ac2e223d38465e2ab4c8 (diff)
parent2ffcf96b4b7a1fbc761796df1336a94408a79ed0 (diff)
downloadflake8-4d6929c8ab45ca8116e36ad63d3fb6a55b1177b5.tar.gz
Merge branch 'add-statistics' into 'master'
Add the statistics module *Description of changes* Start adding support for `--statistics` and legacy `get_statistics` API. *Related to:* (Add bug number here) See merge request !73
-rw-r--r--src/flake8/api/legacy.py7
-rw-r--r--src/flake8/statistics.py118
-rw-r--r--src/flake8/style_guide.py3
-rw-r--r--tests/unit/test_statistics.py121
4 files changed, 248 insertions, 1 deletions
diff --git a/src/flake8/api/legacy.py b/src/flake8/api/legacy.py
index 35c7101..9fc05bb 100644
--- a/src/flake8/api/legacy.py
+++ b/src/flake8/api/legacy.py
@@ -141,6 +141,8 @@ class Report(object):
.. warning:: This should not be instantiated by users.
"""
self._application = application
+ self._style_guide = application.guide
+ self._stats = self._style_guide.stats
@property
def total_errors(self):
@@ -149,4 +151,7 @@ class Report(object):
def get_statistics(self, violation):
"""Get the number of occurences of a violation."""
- raise NotImplementedError('Statistics capturing needs to happen first')
+ return [
+ '{} {} {}'.format(s.count, s.error_code, s.message)
+ for s in self._stats.statistics_for(violation)
+ ]
diff --git a/src/flake8/statistics.py b/src/flake8/statistics.py
new file mode 100644
index 0000000..2512089
--- /dev/null
+++ b/src/flake8/statistics.py
@@ -0,0 +1,118 @@
+"""Statistic collection logic for Flake8."""
+import collections
+
+
+class Statistics(object):
+ """Manager of aggregated statistics for a run of Flake8."""
+
+ def __init__(self):
+ """Initialize the underlying dictionary for our statistics."""
+ self._store = {}
+
+ def record(self, error):
+ """Add the fact that the error was seen in the file.
+
+ :param error:
+ The Error instance containing the information about the violation.
+ :type error:
+ flake8.style_guide.Error
+ """
+ key = Key.create_from(error)
+ if key not in self._store:
+ self._store[key] = Statistic.create_from(error)
+ self._store[key].increment()
+
+ def statistics_for(self, prefix, filename=None):
+ """Generate statistics for the prefix and filename.
+
+ If you have a :class:`Statistics` object that has recorded errors,
+ you can generate the statistics for a prefix (e.g., ``E``, ``E1``,
+ ``W50``, ``W503``) with the optional filter of a filename as well.
+
+ .. code-block:: python
+
+ >>> stats = Statistics()
+ >>> stats.statistics_for('E12',
+ filename='src/flake8/statistics.py')
+ <generator ...>
+ >>> stats.statistics_for('W')
+ <generator ...>
+
+ :param str prefix:
+ The error class or specific error code to find statistics for.
+ :param str filename:
+ (Optional) The filename to further filter results by.
+ :returns:
+ Generator of instances of :class:`Statistic`
+ """
+ matching_errors = sorted(key for key in self._store.keys()
+ if key.matches(prefix, filename))
+ for error_code in matching_errors:
+ yield self._store[error_code]
+
+
+class Key(collections.namedtuple('Key', ['filename', 'code'])):
+ """Simple key structure for the Statistics dictionary.
+
+ To make things clearer, easier to read, and more understandable, we use a
+ namedtuple here for all Keys in the underlying dictionary for the
+ Statistics object.
+ """
+
+ __slots__ = ()
+
+ @classmethod
+ def create_from(cls, error):
+ """Create a Key from :class:`flake8.style_guide.Error`."""
+ return cls(
+ filename=error.filename,
+ code=error.code,
+ )
+
+ def matches(self, prefix, filename):
+ """Determine if this key matches some constraints.
+
+ :param str prefix:
+ The error code prefix that this key's error code should start with.
+ :param str filename:
+ The filename that we potentially want to match on. This can be
+ None to only match on error prefix.
+ :returns:
+ True if the Key's code starts with the prefix and either filename
+ is None, or the Key's filename matches the value passed in.
+ :rtype:
+ bool
+ """
+ return (self.code.startswith(prefix) and
+ (filename is None or
+ self.filename == filename))
+
+
+class Statistic(object):
+ """Simple wrapper around the logic of each statistic.
+
+ Instead of maintaining a simple but potentially hard to reason about
+ tuple, we create a namedtuple which has attributes and a couple
+ convenience methods on it.
+ """
+
+ def __init__(self, error_code, filename, message, count):
+ """Initialize our Statistic."""
+ self.error_code = error_code
+ self.filename = filename
+ self.message = message
+ self.count = count
+
+ @classmethod
+ def create_from(cls, error):
+ """Create a Statistic from a :class:`flake8.style_guide.Error`."""
+ return cls(
+ error_code=error.code,
+ filename=error.filename,
+ message=error.text,
+ count=0,
+ )
+
+ def increment(self):
+ """Increment the number of times we've seen this error in this file."""
+ self.count += 1
diff --git a/src/flake8/style_guide.py b/src/flake8/style_guide.py
index 89890ba..ed1b844 100644
--- a/src/flake8/style_guide.py
+++ b/src/flake8/style_guide.py
@@ -5,6 +5,7 @@ import linecache
import logging
import re
+from flake8 import statistics
from flake8 import utils
__all__ = (
@@ -74,6 +75,7 @@ class StyleGuide(object):
self.options = options
self.listener = listener_trie
self.formatter = formatter
+ self.stats = statistics.Statistics()
self._selected = tuple(options.select)
self._ignored = tuple(options.ignore)
self._decision_cache = {}
@@ -267,6 +269,7 @@ class StyleGuide(object):
if (error_is_selected and is_not_inline_ignored and
is_included_in_diff):
self.formatter.handle(error)
+ self.stats.record(error)
self.listener.notify(error.code, error)
return 1
return 0
diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py
new file mode 100644
index 0000000..f95c638
--- /dev/null
+++ b/tests/unit/test_statistics.py
@@ -0,0 +1,121 @@
+"""Tests for the statistics module in Flake8."""
+import pytest
+
+from flake8 import statistics as stats
+from flake8 import style_guide
+
+DEFAULT_ERROR_CODE = 'E100'
+DEFAULT_FILENAME = 'file.py'
+DEFAULT_TEXT = 'Default text'
+
+
+def make_error(**kwargs):
+ """Create errors with a bunch of default values."""
+ return style_guide.Error(
+ code=kwargs.pop('code', DEFAULT_ERROR_CODE),
+ filename=kwargs.pop('filename', DEFAULT_FILENAME),
+ line_number=kwargs.pop('line_number', 1),
+ column_number=kwargs.pop('column_number', 1),
+ text=kwargs.pop('text', DEFAULT_TEXT),
+ physical_line=None,
+ )
+
+
+def test_key_creation():
+ """Verify how we create Keys from Errors."""
+ key = stats.Key.create_from(make_error())
+ assert key == (DEFAULT_FILENAME, DEFAULT_ERROR_CODE)
+ assert key.filename == DEFAULT_FILENAME
+ assert key.code == DEFAULT_ERROR_CODE
+
+
+@pytest.mark.parametrize('code, filename, args, expected_result', [
+ # Error prefix matches
+ ('E123', 'file000.py', ('E', None), True),
+ ('E123', 'file000.py', ('E1', None), True),
+ ('E123', 'file000.py', ('E12', None), True),
+ ('E123', 'file000.py', ('E123', None), True),
+ # Error prefix and filename match
+ ('E123', 'file000.py', ('E', 'file000.py'), True),
+ ('E123', 'file000.py', ('E1', 'file000.py'), True),
+ ('E123', 'file000.py', ('E12', 'file000.py'), True),
+ ('E123', 'file000.py', ('E123', 'file000.py'), True),
+ # Error prefix does not match
+ ('E123', 'file000.py', ('W', None), False),
+ # Error prefix matches but filename does not
+ ('E123', 'file000.py', ('E', 'file001.py'), False),
+ # Error prefix does not match but filename does
+ ('E123', 'file000.py', ('W', 'file000.py'), False),
+ # Neither error prefix match nor filename
+ ('E123', 'file000.py', ('W', 'file001.py'), False),
+])
+def test_key_matching(code, filename, args, expected_result):
+ """Verify Key#matches behaves as we expect with fthe above input."""
+ key = stats.Key.create_from(make_error(code=code, filename=filename))
+ assert key.matches(*args) is expected_result
+
+
+def test_statistic_creation():
+ """Verify how we create Statistic objects from Errors."""
+ stat = stats.Statistic.create_from(make_error())
+ assert stat.error_code == DEFAULT_ERROR_CODE
+ assert stat.message == DEFAULT_TEXT
+ assert stat.filename == DEFAULT_FILENAME
+ assert stat.count == 0
+
+
+def test_statistic_increment():
+ """Verify we update the count."""
+ stat = stats.Statistic.create_from(make_error())
+ assert stat.count == 0
+ stat.increment()
+ assert stat.count == 1
+
+
+def test_recording_statistics():
+ """Verify that we appropriately create a new Statistic and store it."""
+ aggregator = stats.Statistics()
+ assert list(aggregator.statistics_for('E')) == []
+ aggregator.record(make_error())
+ storage = aggregator._store
+ for key, value in storage.items():
+ assert isinstance(key, stats.Key)
+ assert isinstance(value, stats.Statistic)
+
+ assert storage[(DEFAULT_FILENAME, DEFAULT_ERROR_CODE)].count == 1
+
+
+def test_statistics_for_single_record():
+ """Show we can retrieve the only statistic recorded."""
+ aggregator = stats.Statistics()
+ assert list(aggregator.statistics_for('E')) == []
+ aggregator.record(make_error())
+ statistics = list(aggregator.statistics_for('E'))
+ assert len(statistics) == 1
+ assert isinstance(statistics[0], stats.Statistic)
+
+
+def test_statistics_for_filters_by_filename():
+ """Show we can retrieve the only statistic recorded."""
+ aggregator = stats.Statistics()
+ assert list(aggregator.statistics_for('E')) == []
+ aggregator.record(make_error())
+ aggregator.record(make_error(filename='example.py'))
+
+ statistics = list(aggregator.statistics_for('E', DEFAULT_FILENAME))
+ assert len(statistics) == 1
+ assert isinstance(statistics[0], stats.Statistic)
+
+
+def test_statistic_for_retrieves_more_than_one_value():
+ """Show this works for more than a couple statistic values."""
+ aggregator = stats.Statistics()
+ for i in range(50):
+ aggregator.record(make_error(code='E1{:02d}'.format(i)))
+ aggregator.record(make_error(code='W2{:02d}'.format(i)))
+
+ statistics = list(aggregator.statistics_for('E'))
+ assert len(statistics) == 50
+
+ statistics = list(aggregator.statistics_for('W22'))
+ assert len(statistics) == 10