diff options
author | Takeshi KOMIYA <i.tkomiya@gmail.com> | 2021-02-06 01:33:52 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-06 01:33:52 +0900 |
commit | 163c7bbdc12411818de1ce0204ff29cd911bcaf2 (patch) | |
tree | f0f8c7c26b5f93798bd706d76bd258ae33e3c1bf | |
parent | a39b5f08e9ba853135e7833668406ce845491d71 (diff) | |
parent | 84130fff40102c29e4969444ae7b536c6ce4d7a3 (diff) | |
download | sphinx-git-163c7bbdc12411818de1ce0204ff29cd911bcaf2.tar.gz |
Merge pull request #8806 from tk0miya/refactor_linkcheck5
refactor: linkcheck: Separate worker feature from builder class
-rw-r--r-- | sphinx/builders/linkcheck.py | 225 | ||||
-rw-r--r-- | tests/test_build_linkcheck.py | 23 |
2 files changed, 148 insertions, 100 deletions
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 8877e2ed5..590eec201 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -12,13 +12,13 @@ import json import queue import re import socket -import threading import time import warnings from datetime import datetime, timezone from email.utils import parsedate_to_datetime from html.parser import HTMLParser from os import path +from threading import Thread from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, cast from urllib.parse import unquote, urlparse @@ -129,9 +129,9 @@ class CheckExternalLinksBuilder(DummyBuilder): self.rate_limits = {} # type: Dict[str, RateLimit] self.wqueue = queue.PriorityQueue() # type: queue.PriorityQueue self.rqueue = queue.Queue() # type: queue.Queue - self.workers = [] # type: List[threading.Thread] + self.workers = [] # type: List[Thread] for i in range(self.config.linkcheck_workers): - thread = threading.Thread(target=self.check_thread, daemon=True) + thread = HyperlinkAvailabilityCheckWorker(self) thread.start() self.workers.append(thread) @@ -166,6 +166,134 @@ class CheckExternalLinksBuilder(DummyBuilder): return self._redirected def check_thread(self) -> None: + warnings.warn( + "%s.%s is deprecated." % (self.__class__.__name__, "check_thread"), + RemovedInSphinx50Warning, + stacklevel=2, + ) + # do nothing. + + def limit_rate(self, response: Response) -> Optional[float]: + warnings.warn( + "%s.%s is deprecated." % (self.__class__.__name__, "limit_rate"), + RemovedInSphinx50Warning, + stacklevel=2, + ) + return HyperlinkAvailabilityCheckWorker(self).limit_rate(response) + + def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None: + uri, docname, lineno, status, info, code = result + + filename = self.env.doc2path(docname, None) + linkstat = dict(filename=filename, lineno=lineno, + status=status, code=code, uri=uri, + info=info) + if status == 'unchecked': + self.write_linkstat(linkstat) + return + if status == 'working' and info == 'old': + self.write_linkstat(linkstat) + return + if lineno: + logger.info('(%16s: line %4d) ', docname, lineno, nonl=True) + if status == 'ignored': + if info: + logger.info(darkgray('-ignored- ') + uri + ': ' + info) + else: + logger.info(darkgray('-ignored- ') + uri) + self.write_linkstat(linkstat) + elif status == 'local': + logger.info(darkgray('-local- ') + uri) + self.write_entry('local', docname, filename, lineno, uri) + self.write_linkstat(linkstat) + elif status == 'working': + logger.info(darkgreen('ok ') + uri + info) + self.write_linkstat(linkstat) + elif status == 'broken': + if self.app.quiet or self.app.warningiserror: + logger.warning(__('broken link: %s (%s)'), uri, info, + location=(filename, lineno)) + else: + logger.info(red('broken ') + uri + red(' - ' + info)) + self.write_entry('broken', docname, filename, lineno, uri + ': ' + info) + self.write_linkstat(linkstat) + elif status == 'redirected': + try: + text, color = { + 301: ('permanently', purple), + 302: ('with Found', purple), + 303: ('with See Other', purple), + 307: ('temporarily', turquoise), + 308: ('permanently', purple), + }[code] + except KeyError: + text, color = ('with unknown code', purple) + linkstat['text'] = text + logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info)) + self.write_entry('redirected ' + text, docname, filename, + lineno, uri + ' to ' + info) + self.write_linkstat(linkstat) + else: + raise ValueError("Unknown status %s." % status) + + def write_entry(self, what: str, docname: str, filename: str, line: int, + uri: str) -> None: + self.txt_outfile.write("%s:%s: [%s] %s\n" % (filename, line, what, uri)) + + def write_linkstat(self, data: dict) -> None: + self.json_outfile.write(json.dumps(data)) + self.json_outfile.write('\n') + + def finish(self) -> None: + logger.info('') + + with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\ + open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile: + total_links = 0 + for hyperlink in self.hyperlinks.values(): + if self.is_ignored_uri(hyperlink.uri): + self.process_result( + CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno, + 'ignored', '', 0)) + else: + self.wqueue.put(hyperlink, False) + total_links += 1 + + done = 0 + while done < total_links: + self.process_result(self.rqueue.get()) + done += 1 + + if self._broken: + self.app.statuscode = 1 + + self.wqueue.join() + # Shutdown threads. + for worker in self.workers: + self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False) + + +class HyperlinkAvailabilityCheckWorker(Thread): + """A worker class for checking the availability of hyperlinks.""" + + def __init__(self, builder: CheckExternalLinksBuilder) -> None: + self.app = builder.app + self.anchors_ignore = builder.anchors_ignore + self.auth = builder.auth + self.config = builder.config + self.env = builder.env + self.rate_limits = builder.rate_limits + self.rqueue = builder.rqueue + self.to_ignore = builder.to_ignore + self.wqueue = builder.wqueue + + self._good = builder._good + self._broken = builder._broken + self._redirected = builder._redirected + + super().__init__(daemon=True) + + def run(self) -> None: kwargs = {} if self.config.linkcheck_timeout: kwargs['timeout'] = self.config.linkcheck_timeout @@ -378,97 +506,6 @@ class CheckExternalLinksBuilder(DummyBuilder): self.rate_limits[netloc] = RateLimit(delay, next_check) return next_check - def process_result(self, result: CheckResult) -> None: - uri, docname, lineno, status, info, code = result - - filename = self.env.doc2path(docname, None) - linkstat = dict(filename=filename, lineno=lineno, - status=status, code=code, uri=uri, - info=info) - if status == 'unchecked': - self.write_linkstat(linkstat) - return - if status == 'working' and info == 'old': - self.write_linkstat(linkstat) - return - if lineno: - logger.info('(%16s: line %4d) ', docname, lineno, nonl=True) - if status == 'ignored': - if info: - logger.info(darkgray('-ignored- ') + uri + ': ' + info) - else: - logger.info(darkgray('-ignored- ') + uri) - self.write_linkstat(linkstat) - elif status == 'local': - logger.info(darkgray('-local- ') + uri) - self.write_entry('local', docname, filename, lineno, uri) - self.write_linkstat(linkstat) - elif status == 'working': - logger.info(darkgreen('ok ') + uri + info) - self.write_linkstat(linkstat) - elif status == 'broken': - if self.app.quiet or self.app.warningiserror: - logger.warning(__('broken link: %s (%s)'), uri, info, - location=(filename, lineno)) - else: - logger.info(red('broken ') + uri + red(' - ' + info)) - self.write_entry('broken', docname, filename, lineno, uri + ': ' + info) - self.write_linkstat(linkstat) - elif status == 'redirected': - try: - text, color = { - 301: ('permanently', purple), - 302: ('with Found', purple), - 303: ('with See Other', purple), - 307: ('temporarily', turquoise), - 308: ('permanently', purple), - }[code] - except KeyError: - text, color = ('with unknown code', purple) - linkstat['text'] = text - logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info)) - self.write_entry('redirected ' + text, docname, filename, - lineno, uri + ' to ' + info) - self.write_linkstat(linkstat) - else: - raise ValueError("Unknown status %s." % status) - - def write_entry(self, what: str, docname: str, filename: str, line: int, - uri: str) -> None: - self.txt_outfile.write("%s:%s: [%s] %s\n" % (filename, line, what, uri)) - - def write_linkstat(self, data: dict) -> None: - self.json_outfile.write(json.dumps(data)) - self.json_outfile.write('\n') - - def finish(self) -> None: - logger.info('') - - with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\ - open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile: - total_links = 0 - for hyperlink in self.hyperlinks.values(): - if self.is_ignored_uri(hyperlink.uri): - self.process_result( - CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno, - 'ignored', '', 0)) - else: - self.wqueue.put(hyperlink, False) - total_links += 1 - - done = 0 - while done < total_links: - self.process_result(self.rqueue.get()) - done += 1 - - if self._broken: - self.app.statuscode = 1 - - self.wqueue.join() - # Shutdown threads. - for worker in self.workers: - self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False) - class HyperlinkCollector(SphinxPostTransform): builders = ('linkcheck',) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 60b62435c..e297d42c4 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -21,7 +21,8 @@ from unittest import mock import pytest import requests -from sphinx.builders.linkcheck import CheckExternalLinksBuilder, RateLimit +from sphinx.builders.linkcheck import (CheckExternalLinksBuilder, + HyperlinkAvailabilityCheckWorker, RateLimit) from sphinx.util.console import strip_colors from .utils import CERT_FILE, http_server, https_server @@ -536,40 +537,50 @@ class FakeResponse: def test_limit_rate_default_sleep(app): checker = CheckExternalLinksBuilder(app) + checker.init() checker.rate_limits = {} + worker = HyperlinkAvailabilityCheckWorker(checker) with mock.patch('time.time', return_value=0.0): - next_check = checker.limit_rate(FakeResponse()) + next_check = worker.limit_rate(FakeResponse()) assert next_check == 60.0 def test_limit_rate_user_max_delay(app): app.config.linkcheck_rate_limit_timeout = 0.0 checker = CheckExternalLinksBuilder(app) + checker.init() checker.rate_limits = {} - next_check = checker.limit_rate(FakeResponse()) + worker = HyperlinkAvailabilityCheckWorker(checker) + next_check = worker.limit_rate(FakeResponse()) assert next_check is None def test_limit_rate_doubles_previous_wait_time(app): checker = CheckExternalLinksBuilder(app) + checker.init() checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)} + worker = HyperlinkAvailabilityCheckWorker(checker) with mock.patch('time.time', return_value=0.0): - next_check = checker.limit_rate(FakeResponse()) + next_check = worker.limit_rate(FakeResponse()) assert next_check == 120.0 def test_limit_rate_clips_wait_time_to_max_time(app): checker = CheckExternalLinksBuilder(app) + checker.init() app.config.linkcheck_rate_limit_timeout = 90.0 checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)} + worker = HyperlinkAvailabilityCheckWorker(checker) with mock.patch('time.time', return_value=0.0): - next_check = checker.limit_rate(FakeResponse()) + next_check = worker.limit_rate(FakeResponse()) assert next_check == 90.0 def test_limit_rate_bails_out_after_waiting_max_time(app): checker = CheckExternalLinksBuilder(app) + checker.init() app.config.linkcheck_rate_limit_timeout = 90.0 checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)} - next_check = checker.limit_rate(FakeResponse()) + worker = HyperlinkAvailabilityCheckWorker(checker) + next_check = worker.limit_rate(FakeResponse()) assert next_check is None |