diff options
Diffstat (limited to 'examples/httpServerLogParser.py')
-rw-r--r-- | examples/httpServerLogParser.py | 79 |
1 files changed, 55 insertions, 24 deletions
diff --git a/examples/httpServerLogParser.py b/examples/httpServerLogParser.py index b10678b..c84337f 100644 --- a/examples/httpServerLogParser.py +++ b/examples/httpServerLogParser.py @@ -23,39 +23,69 @@ Referer Client Software """ -from pyparsing import alphas,nums, dblQuotedString, Combine, Word, Group, delimitedList, Suppress, removeQuotes +from pyparsing import ( + alphas, + nums, + dblQuotedString, + Combine, + Word, + Group, + delimitedList, + Suppress, + removeQuotes, +) import string -def getCmdFields( s, l, t ): - t["method"],t["requestURI"],t["protocolVersion"] = t[0].strip('"').split() + +def getCmdFields(s, l, t): + t["method"], t["requestURI"], t["protocolVersion"] = t[0].strip('"').split() + logLineBNF = None + + def getLogLineBNF(): global logLineBNF if logLineBNF is None: - integer = Word( nums ) - ipAddress = delimitedList( integer, ".", combine=True ) + integer = Word(nums) + ipAddress = delimitedList(integer, ".", combine=True) - timeZoneOffset = Word("+-",nums) + timeZoneOffset = Word("+-", nums) month = Word(string.ascii_uppercase, string.ascii_lowercase, exact=3) - serverDateTime = Group( Suppress("[") + - Combine( integer + "/" + month + "/" + integer + - ":" + integer + ":" + integer + ":" + integer ) + - timeZoneOffset + - Suppress("]") ) + serverDateTime = Group( + Suppress("[") + + Combine( + integer + + "/" + + month + + "/" + + integer + + ":" + + integer + + ":" + + integer + + ":" + + integer + ) + + timeZoneOffset + + Suppress("]") + ) - logLineBNF = ( ipAddress.setResultsName("ipAddr") + - Suppress("-") + - ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + - serverDateTime.setResultsName("timestamp") + - dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + - (integer | "-").setResultsName("statusCode") + - (integer | "-").setResultsName("numBytesSent") + - dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + - dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) + logLineBNF = ( + ipAddress.setResultsName("ipAddr") + + Suppress("-") + + ("-" | Word(alphas + nums + "@._")).setResultsName("auth") + + serverDateTime.setResultsName("timestamp") + + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + + (integer | "-").setResultsName("statusCode") + + (integer | "-").setResultsName("numBytesSent") + + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) + ) return logLineBNF + testdata = """ 195.146.134.15 - - [20/Jan/2003:08:55:36 -0800] "GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" 111.111.111.11 - - [16/Feb/2004:04:09:49 -0800] "GET /ads/redirectads/336x280redirect.htm HTTP/1.1" 304 - "http://www.foobarp.org/theme_detail.php?type=vs&cat=0&mid=27512" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" @@ -63,10 +93,11 @@ testdata = """ 127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300] "GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6" """ for line in testdata.split("\n"): - if not line: continue + if not line: + continue fields = getLogLineBNF().parseString(line) print(fields.dump()) - #~ print repr(fields) - #~ for k in fields.keys(): - #~ print "fields." + k + " =", fields[k] + # ~ print repr(fields) + # ~ for k in fields.keys(): + # ~ print "fields." + k + " =", fields[k] print() |