summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakeshi KOMIYA <i.tkomiya@gmail.com>2021-02-06 01:33:52 +0900
committerGitHub <noreply@github.com>2021-02-06 01:33:52 +0900
commit163c7bbdc12411818de1ce0204ff29cd911bcaf2 (patch)
treef0f8c7c26b5f93798bd706d76bd258ae33e3c1bf
parenta39b5f08e9ba853135e7833668406ce845491d71 (diff)
parent84130fff40102c29e4969444ae7b536c6ce4d7a3 (diff)
downloadsphinx-git-163c7bbdc12411818de1ce0204ff29cd911bcaf2.tar.gz
Merge pull request #8806 from tk0miya/refactor_linkcheck5
refactor: linkcheck: Separate worker feature from builder class
-rw-r--r--sphinx/builders/linkcheck.py225
-rw-r--r--tests/test_build_linkcheck.py23
2 files changed, 148 insertions, 100 deletions
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py
index 8877e2ed5..590eec201 100644
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@@ -12,13 +12,13 @@ import json
import queue
import re
import socket
-import threading
import time
import warnings
from datetime import datetime, timezone
from email.utils import parsedate_to_datetime
from html.parser import HTMLParser
from os import path
+from threading import Thread
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, cast
from urllib.parse import unquote, urlparse
@@ -129,9 +129,9 @@ class CheckExternalLinksBuilder(DummyBuilder):
self.rate_limits = {} # type: Dict[str, RateLimit]
self.wqueue = queue.PriorityQueue() # type: queue.PriorityQueue
self.rqueue = queue.Queue() # type: queue.Queue
- self.workers = [] # type: List[threading.Thread]
+ self.workers = [] # type: List[Thread]
for i in range(self.config.linkcheck_workers):
- thread = threading.Thread(target=self.check_thread, daemon=True)
+ thread = HyperlinkAvailabilityCheckWorker(self)
thread.start()
self.workers.append(thread)
@@ -166,6 +166,134 @@ class CheckExternalLinksBuilder(DummyBuilder):
return self._redirected
def check_thread(self) -> None:
+ warnings.warn(
+ "%s.%s is deprecated." % (self.__class__.__name__, "check_thread"),
+ RemovedInSphinx50Warning,
+ stacklevel=2,
+ )
+ # do nothing.
+
+ def limit_rate(self, response: Response) -> Optional[float]:
+ warnings.warn(
+ "%s.%s is deprecated." % (self.__class__.__name__, "limit_rate"),
+ RemovedInSphinx50Warning,
+ stacklevel=2,
+ )
+ return HyperlinkAvailabilityCheckWorker(self).limit_rate(response)
+
+ def process_result(self, result: Tuple[str, str, int, str, str, int]) -> None:
+ uri, docname, lineno, status, info, code = result
+
+ filename = self.env.doc2path(docname, None)
+ linkstat = dict(filename=filename, lineno=lineno,
+ status=status, code=code, uri=uri,
+ info=info)
+ if status == 'unchecked':
+ self.write_linkstat(linkstat)
+ return
+ if status == 'working' and info == 'old':
+ self.write_linkstat(linkstat)
+ return
+ if lineno:
+ logger.info('(%16s: line %4d) ', docname, lineno, nonl=True)
+ if status == 'ignored':
+ if info:
+ logger.info(darkgray('-ignored- ') + uri + ': ' + info)
+ else:
+ logger.info(darkgray('-ignored- ') + uri)
+ self.write_linkstat(linkstat)
+ elif status == 'local':
+ logger.info(darkgray('-local- ') + uri)
+ self.write_entry('local', docname, filename, lineno, uri)
+ self.write_linkstat(linkstat)
+ elif status == 'working':
+ logger.info(darkgreen('ok ') + uri + info)
+ self.write_linkstat(linkstat)
+ elif status == 'broken':
+ if self.app.quiet or self.app.warningiserror:
+ logger.warning(__('broken link: %s (%s)'), uri, info,
+ location=(filename, lineno))
+ else:
+ logger.info(red('broken ') + uri + red(' - ' + info))
+ self.write_entry('broken', docname, filename, lineno, uri + ': ' + info)
+ self.write_linkstat(linkstat)
+ elif status == 'redirected':
+ try:
+ text, color = {
+ 301: ('permanently', purple),
+ 302: ('with Found', purple),
+ 303: ('with See Other', purple),
+ 307: ('temporarily', turquoise),
+ 308: ('permanently', purple),
+ }[code]
+ except KeyError:
+ text, color = ('with unknown code', purple)
+ linkstat['text'] = text
+ logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info))
+ self.write_entry('redirected ' + text, docname, filename,
+ lineno, uri + ' to ' + info)
+ self.write_linkstat(linkstat)
+ else:
+ raise ValueError("Unknown status %s." % status)
+
+ def write_entry(self, what: str, docname: str, filename: str, line: int,
+ uri: str) -> None:
+ self.txt_outfile.write("%s:%s: [%s] %s\n" % (filename, line, what, uri))
+
+ def write_linkstat(self, data: dict) -> None:
+ self.json_outfile.write(json.dumps(data))
+ self.json_outfile.write('\n')
+
+ def finish(self) -> None:
+ logger.info('')
+
+ with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
+ open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
+ total_links = 0
+ for hyperlink in self.hyperlinks.values():
+ if self.is_ignored_uri(hyperlink.uri):
+ self.process_result(
+ CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
+ 'ignored', '', 0))
+ else:
+ self.wqueue.put(hyperlink, False)
+ total_links += 1
+
+ done = 0
+ while done < total_links:
+ self.process_result(self.rqueue.get())
+ done += 1
+
+ if self._broken:
+ self.app.statuscode = 1
+
+ self.wqueue.join()
+ # Shutdown threads.
+ for worker in self.workers:
+ self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
+
+
+class HyperlinkAvailabilityCheckWorker(Thread):
+ """A worker class for checking the availability of hyperlinks."""
+
+ def __init__(self, builder: CheckExternalLinksBuilder) -> None:
+ self.app = builder.app
+ self.anchors_ignore = builder.anchors_ignore
+ self.auth = builder.auth
+ self.config = builder.config
+ self.env = builder.env
+ self.rate_limits = builder.rate_limits
+ self.rqueue = builder.rqueue
+ self.to_ignore = builder.to_ignore
+ self.wqueue = builder.wqueue
+
+ self._good = builder._good
+ self._broken = builder._broken
+ self._redirected = builder._redirected
+
+ super().__init__(daemon=True)
+
+ def run(self) -> None:
kwargs = {}
if self.config.linkcheck_timeout:
kwargs['timeout'] = self.config.linkcheck_timeout
@@ -378,97 +506,6 @@ class CheckExternalLinksBuilder(DummyBuilder):
self.rate_limits[netloc] = RateLimit(delay, next_check)
return next_check
- def process_result(self, result: CheckResult) -> None:
- uri, docname, lineno, status, info, code = result
-
- filename = self.env.doc2path(docname, None)
- linkstat = dict(filename=filename, lineno=lineno,
- status=status, code=code, uri=uri,
- info=info)
- if status == 'unchecked':
- self.write_linkstat(linkstat)
- return
- if status == 'working' and info == 'old':
- self.write_linkstat(linkstat)
- return
- if lineno:
- logger.info('(%16s: line %4d) ', docname, lineno, nonl=True)
- if status == 'ignored':
- if info:
- logger.info(darkgray('-ignored- ') + uri + ': ' + info)
- else:
- logger.info(darkgray('-ignored- ') + uri)
- self.write_linkstat(linkstat)
- elif status == 'local':
- logger.info(darkgray('-local- ') + uri)
- self.write_entry('local', docname, filename, lineno, uri)
- self.write_linkstat(linkstat)
- elif status == 'working':
- logger.info(darkgreen('ok ') + uri + info)
- self.write_linkstat(linkstat)
- elif status == 'broken':
- if self.app.quiet or self.app.warningiserror:
- logger.warning(__('broken link: %s (%s)'), uri, info,
- location=(filename, lineno))
- else:
- logger.info(red('broken ') + uri + red(' - ' + info))
- self.write_entry('broken', docname, filename, lineno, uri + ': ' + info)
- self.write_linkstat(linkstat)
- elif status == 'redirected':
- try:
- text, color = {
- 301: ('permanently', purple),
- 302: ('with Found', purple),
- 303: ('with See Other', purple),
- 307: ('temporarily', turquoise),
- 308: ('permanently', purple),
- }[code]
- except KeyError:
- text, color = ('with unknown code', purple)
- linkstat['text'] = text
- logger.info(color('redirect ') + uri + color(' - ' + text + ' to ' + info))
- self.write_entry('redirected ' + text, docname, filename,
- lineno, uri + ' to ' + info)
- self.write_linkstat(linkstat)
- else:
- raise ValueError("Unknown status %s." % status)
-
- def write_entry(self, what: str, docname: str, filename: str, line: int,
- uri: str) -> None:
- self.txt_outfile.write("%s:%s: [%s] %s\n" % (filename, line, what, uri))
-
- def write_linkstat(self, data: dict) -> None:
- self.json_outfile.write(json.dumps(data))
- self.json_outfile.write('\n')
-
- def finish(self) -> None:
- logger.info('')
-
- with open(path.join(self.outdir, 'output.txt'), 'w') as self.txt_outfile,\
- open(path.join(self.outdir, 'output.json'), 'w') as self.json_outfile:
- total_links = 0
- for hyperlink in self.hyperlinks.values():
- if self.is_ignored_uri(hyperlink.uri):
- self.process_result(
- CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno,
- 'ignored', '', 0))
- else:
- self.wqueue.put(hyperlink, False)
- total_links += 1
-
- done = 0
- while done < total_links:
- self.process_result(self.rqueue.get())
- done += 1
-
- if self._broken:
- self.app.statuscode = 1
-
- self.wqueue.join()
- # Shutdown threads.
- for worker in self.workers:
- self.wqueue.put((CHECK_IMMEDIATELY, None, None, None), False)
-
class HyperlinkCollector(SphinxPostTransform):
builders = ('linkcheck',)
diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py
index 60b62435c..e297d42c4 100644
--- a/tests/test_build_linkcheck.py
+++ b/tests/test_build_linkcheck.py
@@ -21,7 +21,8 @@ from unittest import mock
import pytest
import requests
-from sphinx.builders.linkcheck import CheckExternalLinksBuilder, RateLimit
+from sphinx.builders.linkcheck import (CheckExternalLinksBuilder,
+ HyperlinkAvailabilityCheckWorker, RateLimit)
from sphinx.util.console import strip_colors
from .utils import CERT_FILE, http_server, https_server
@@ -536,40 +537,50 @@ class FakeResponse:
def test_limit_rate_default_sleep(app):
checker = CheckExternalLinksBuilder(app)
+ checker.init()
checker.rate_limits = {}
+ worker = HyperlinkAvailabilityCheckWorker(checker)
with mock.patch('time.time', return_value=0.0):
- next_check = checker.limit_rate(FakeResponse())
+ next_check = worker.limit_rate(FakeResponse())
assert next_check == 60.0
def test_limit_rate_user_max_delay(app):
app.config.linkcheck_rate_limit_timeout = 0.0
checker = CheckExternalLinksBuilder(app)
+ checker.init()
checker.rate_limits = {}
- next_check = checker.limit_rate(FakeResponse())
+ worker = HyperlinkAvailabilityCheckWorker(checker)
+ next_check = worker.limit_rate(FakeResponse())
assert next_check is None
def test_limit_rate_doubles_previous_wait_time(app):
checker = CheckExternalLinksBuilder(app)
+ checker.init()
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
+ worker = HyperlinkAvailabilityCheckWorker(checker)
with mock.patch('time.time', return_value=0.0):
- next_check = checker.limit_rate(FakeResponse())
+ next_check = worker.limit_rate(FakeResponse())
assert next_check == 120.0
def test_limit_rate_clips_wait_time_to_max_time(app):
checker = CheckExternalLinksBuilder(app)
+ checker.init()
app.config.linkcheck_rate_limit_timeout = 90.0
checker.rate_limits = {"localhost": RateLimit(60.0, 0.0)}
+ worker = HyperlinkAvailabilityCheckWorker(checker)
with mock.patch('time.time', return_value=0.0):
- next_check = checker.limit_rate(FakeResponse())
+ next_check = worker.limit_rate(FakeResponse())
assert next_check == 90.0
def test_limit_rate_bails_out_after_waiting_max_time(app):
checker = CheckExternalLinksBuilder(app)
+ checker.init()
app.config.linkcheck_rate_limit_timeout = 90.0
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
- next_check = checker.limit_rate(FakeResponse())
+ worker = HyperlinkAvailabilityCheckWorker(checker)
+ next_check = worker.limit_rate(FakeResponse())
assert next_check is None