# getNTPserversNew.py # # Demonstration of the parsing module, implementing a HTML page scanner, # to extract a list of NTP time servers from the NIST web site. # # Copyright 2004-2010, by Paul McGuire # September, 2010 - updated to more current use of setResultsName, new NIST URL # import pyparsing as pp ppc = pp.pyparsing_common from urllib.request import urlopen integer = pp.Word(pp.nums) ipAddress = ppc.ipv4_address() hostname = pp.delimitedList(pp.Word(pp.alphas, pp.alphanums + "-_"), ".", combine=True) tdStart, tdEnd = pp.makeHTMLTags("td") timeServerPattern = ( tdStart + hostname("hostname") + tdEnd + tdStart + ipAddress("ipAddr") + tdEnd + tdStart + tdStart.tag_body("loc") + tdEnd ) # get list of time servers nistTimeServerURL = "https://tf.nist.gov/tf-cgi/servers.cgi#" with urlopen(nistTimeServerURL) as serverListPage: serverListHTML = serverListPage.read().decode("UTF-8") addrs = {} for srvr, startloc, endloc in timeServerPattern.scanString(serverListHTML): print("{} ({}) - {}".format(srvr.ipAddr, srvr.hostname.strip(), srvr.loc.strip())) addrs[srvr.ipAddr] = srvr.loc