diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-04-06 23:44:02 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-04-06 23:44:02 -0500 |
commit | a2439508ba5c94546db98593cfa676de9b59babe (patch) | |
tree | 80b02178820811c09b4befc9a9b5efb092813466 /examples | |
parent | 832986ffccac943b363da43795c335eafc31b5da (diff) | |
download | pyparsing-git-a2439508ba5c94546db98593cfa676de9b59babe.tar.gz |
Fixed dict structure in makeHTMLTags expressions, and added tag_body attribute to the generated start expression giving easy access to a SkipTo(closeTag) that will parse the tag's body text; some code cleanup and removed duplication among examples
Diffstat (limited to 'examples')
-rw-r--r-- | examples/0README.html | 604 | ||||
-rw-r--r-- | examples/getNTPservers.py | 30 | ||||
-rw-r--r-- | examples/getNTPserversNew.py | 71 | ||||
-rw-r--r-- | examples/htmlStripper.py | 64 | ||||
-rw-r--r-- | examples/htmlTableParser.py | 4 | ||||
-rw-r--r-- | examples/makeHTMLTagExample.py | 21 | ||||
-rw-r--r-- | examples/scanYahoo.py | 14 | ||||
-rw-r--r-- | examples/urlExtractor.py | 60 | ||||
-rw-r--r-- | examples/urlExtractorNew.py | 62 | ||||
-rw-r--r-- | examples/withAttribute.py | 50 |
10 files changed, 452 insertions, 528 deletions
diff --git a/examples/0README.html b/examples/0README.html index adecdc8..617c16e 100644 --- a/examples/0README.html +++ b/examples/0README.html @@ -1,309 +1,295 @@ -<HTML>
-<title>pyparsing Examples</title>
-<body>
-<h1>pyparsing Examples</h1>
-<p>
-This directory contains a number of Python scripts that can get you started in learning to use pyparsing.
-
-<ul>
-<li><a href="greeting.py">greeting.py</a><br>
-Parse "Hello, World!".
-</li>
-<p>
-
-<li><a href="greetingInKorean.py">greetingInKorean.py</a> <i>~ submission by June Kim</i><br>
-Unicode example to parse "Hello, World!" in Korean.
-</li>
-<p>
-
-<li><a href="greetingInGreek.py">greetingInGreek.py</a> <i>~ submission by ???</i><br>
-Unicode example to parse "Hello, World!" in Greek.
-</li>
-<p>
-
-<li><a href="holaMundo.py">holaMundo.py</a> <i>~ submission by Marco Alfonso</i><br>
-"Hello, World!" example translated to Spanish, from Marco Alfonso's blog.
-</li>
-<p>
-
-<li><a href="chemicalFormulas.py">chemicalFormulas.py</a><br>
-Simple example to demonstrate the use of ParseResults returned from parseString().
-Parses a chemical formula (such as "H2O" or "C6H5OH"), and walks the returned list of tokens to calculate the molecular weight.
-</li>
-<p>
-
-<li><a href="wordsToNum.py">wordsToNum.py</a><br>
-A sample program that reads a number in words (such as "fifteen hundred and sixty four"), and returns the actual number (1564).
-Also demonstrates some processing of ParseExceptions, including marking where the parse failure was found.
-</li>
-<p>
-
-<li><a href="pythonGrammarparser.py">pythonGrammarparser.py</a> <i>~ suggested by JH Stovall</i><br>
-A sample program that parses the EBNF used in the Python source code to define the Python grammar. From this parser,
-one can generate Python grammar documentation tools, such as railroad track diagrams. Also demonstrates use of
-Dict class.
-</li>
-<p>
-
-<li><a href="commasep.py">commasep.py</a><br>
-Demonstration of the use of the commaSeparatedList helper. Shows examples of
-proper handling of commas within quotes, trimming of whitespace around delimited entries, and handling of consecutive commas (null arguments). Includes comparison with simple string.split(',').
-</li>
-<p>
-
-<li><a href="dictExample.py">dictExample.py</a><br>
-A demonstration of using the Dict class, to parse a table of ASCII tabulated data.
-</li>
-<p>
-
-<li><a href="dictExample2.py">dictExample2.py</a> <i>~ submission by Mike Kelly</i><br>
-An extended version of dictExample.py, in which Mike Kelly also parses the column headers, and generates a transposed version of the original table!
-</li>
-<p>
-
-<li><a href="scanExamples.py">scanExamples.py</a><br>
-Some examples of using scanString and transformString, as alternative parsing methods to parseString, to do macro substitution, and selection and/or removal of matching strings within a source file.
-</li>
-<p>
-
-<li><a href="urlExtractor.py">urlExtractor.py</a><br>
-Another example using scanString, this time to extract all HREF references found on Yahoo!'s home page, and return them as a dictionary.
-</li>
-<p>
-
-<li><a href="makeHTMLTagExample.py">makeHTMLTagExample.py</a><br>
-A sample program showing sample definitions and applications of HTML tag expressions
-created using makeHTMLTags helper function. Very useful for scraping data from HTML pages.
-</li>
-<p>
-
-<li><a href="urlExtractorNew.py">urlExtractorNew.py</a><br>
-Another updated version of urlExtractor.py, using the new makeHTMLTags() method.
-</li>
-<p>
-
-<li><a href="fourFn.py">fourFn.py</a><br>
-A simple algebraic expression parser, that performs +,-,*,/, and ^ arithmetic operations. (With suggestions and bug-fixes graciously offered by Andrea Griffini.)
-</li>
-<p>
-
-<li><a href="SimpleCalc.py">SimpleCalc.py</a> <i>~ submission by Steven Siew</i><br>
-An interactive version of fourFn.py, with support for variables.
-</li>
-<p>
-
-<li><a href="LAParser.py">LAParser.py</a> <i>~ submission by Mike Ellis</i><br>
-An interactive Linear Algebra Parser, an extension of SimpleCalc.py. Supports linear algebra (LA) notation for vectors, matrices, and scalars,
-including matrix operations such as inversion and determinants. Converts LA expressions to C code - uses a separate C library for runtime
-evaluation of results.
-</li>
-<p>
-
-<li><a href="configParse.py">configParse.py</a><br>
-A simple alternative to Python's ConfigParse module, demonstrating the use of the Dict class to return nested dictionary access to configuration values.
-</li>
-<p>
-
-<li><a href="getNTPservers.py">getNTPservers.py</a><br>
-Yet another scanString example, to read/extract the list of NTP servers from NIST's web site.
-</li>
-<p>
-
-<li><a href="getNTPserversNew.py">getNTPserversNew.py</a><br>
-An updated version of getNTPservers.py, using the new makeHTMLTags() method.
-</li>
-<p>
-
-<li><a href="httpServerLogParser.py">httpServerLogParser.py</a><br>
-Parser for Apache server log files.
-</li>
-<p>
-
-<li><a href="idlParse.py">idlParse.py</a><br>
-Parser for CORBA IDL files.
-</li>
-<p>
-
-<li><a href="mozillaCalendarParser.py">mozillaCalendarParser.py</a>
-<i>~ submission by Petri Savolainen</i><br>
-Parser for Mozilla calendar (*.ics) files.
-</li>
-<p>
-
-<li><a href="pgn.py">pgn.py</a> <i>~ submission by Alberto Santini</i><br>
-Parser for PGN (Portable Game Notation) files, the standard form for documenting the moves in chess games.
-</li>
-<p>
-
-<li><a href="simpleSQL.py">simpleSQL.py</a><br>
-A simple parser that will extract table and column names from SQL SELECT statements..
-</li>
-<p>
-
-<li><a href="dfmparse.py">dfmparse.py</a> <i>~ submission by Dan Griffith</i><br>
-Parser for Delphi forms.
-</li>
-<p>
-
-<li><a href="ebnf.py">ebnf.py / ebnftest.py</a> <i>~ submission by Seo Sanghyeon</i><br>
-An EBNF-compiler that reads EBNF and generates a pyparsing grammar! Including a test that compiles... EBNF itself!
-</li>
-<p>
-
-<li><a href="searchparser.py">searchparser.py</a> <i>~ submission by Steven Mooij and Rudolph Froger</i><br>
-An expression parser that parses search strings, with special keyword and expression operations using (), not, and, or, and quoted strings.
-</li>
-<p>
-
-<li><a href="sparser.py">sparser.py</a> <i>~ submission by Tim Cera</i><br>
-A configurable parser module that can be configured with a list of tuples, giving a high-level definition for parsing common sets
-of water table data files. Tim had to contend with several different styles of data file formats, each with slight variations of its own.
-Tim created a configurable parser (or "SPECIFIED parser" - hence the name "sparser"), that simply works from a config variable listing
-the field names and data types, and implicitly, their order in the source data file.
-<p>
-See <a href="mayport_florida_8720220_data_def.txt">mayport_florida_8720220_data_def.txt</a> for an
-example configuration file.
-</li>
-<p>
-
-<li><a href="romanNumerals.py">romanNumerals.py</a><br>
-A Roman numeral generator and parser example, showing the power of parse actions
-to compile Roman numerals into their integer values.
-</li>
-<p>
-
-<li><a href="removeLineBreaks.py">removeLineBreaks.py</a><br>
-A string transformer that converts text files with hard line-breaks into one with line breaks
-only between paragraphs. Useful when converting downloads from
-<a href="https://www.gutenberg.org/">Project Gutenberg</a> to import to word processing apps
-that can reformat paragraphs once hard line-breaks are removed, or for loading into your Palm Pilot for portable perusal.
-<p>
-See <a href="Successful Methods of Public Speaking.txt">Successful Methods of Public Speaking.txt</a> and
-<a href="Successful Methods of Public Speaking(2).txt">Successful Methods of Public Speaking(2).txt</a> for a sample
-before and after (text file courtesy of Project Gutenberg).
-</li>
-<p>
-
-<li><a href="listAllMatches.py">listAllMatches.py</a><br>
-An example program showing the utility of the listAllMatches option when specifying results naming.
-</li>
-<p>
-
-<li><a href="linenoExample.py">linenoExample.py</a><br>
-An example program showing how to use the string location to extract line and column numbers, or the
-source line of text.
-</li>
-<p>
-
-<li><a href="parseListString.py">parseListString.py</a><br>
-An example program showing a progression of steps, how to parse a string representation of a Python
-list back into a true list.
-</li>
-<p>
-
-<li><a href="parsePythonValue.py">parsePythonValue.py</a><br>
-An extension of parseListString.py to parse tuples and dicts, including nested values,
-returning a Python value of the original type.
-</li>
-<p>
-
-<li><a href="indentedGrammarExample.py">indentedGrammarExample.py</a><br>
-An example program showing how to parse a grammar using indentation for grouping,
-such as is done in Python.
-</li>
-<p>
-
-<li><a href="simpleArith.py">simpleArith.py</a><br>
-An example program showing how to use the new operatorPrecedence helper method to define a 6-function
-(+, -, *, /, ^, and !) arithmetic expression parser, with unary plus and minus signs.
-</li>
-<p>
-
-<li><a href="simpleBool.py">simpleBool.py</a><br>
-An example program showing how to use the new operatorPrecedence helper method to define a
-boolean expression parser, with parse actions associated with each operator to "compile" the expression
-into a data structure that will evaluate the expression's boolean value.
-</li>
-<p>
-
-<li><a href="simpleWiki.py">simpleWiki.py</a><br>
-An example program showing how to use transformString to implement a simple Wiki markup parser.
-</li>
-<p>
-
-<li><a href="sql2dot.py">sql2dot.py</a><i>~ submission by EnErGy [CSDX]</i><br>
-A nice graphing program that generates schema diagrams from SQL table definition statements.
-</li>
-<p>
-
-<li><a href="htmlStripper.py">htmlStripper.py</a><br>
-An example implementation of a common application, removing HTML markup tags from an HTML page,
-leaving just the text content.
-</li>
-<p>
-
-<li><a href="macroExpansion.py">macroExpansion.py</a><br>
-An example implementation of a simple preprocessor, that will read embedded macro definitions
-and replace macro references with the defined substitution string.
-</li>
-<p>
-
-<li><a href="sexpParser.py">sexpParser.py</a><br>
-A parser that uses a recursive grammar to parse S-expressions.
-</li>
-<p>
-
-<li><a href="nested.py">nested.py</a><br>
-An example using nestedExpr, a helper method to simplify definitions of expressions of nested lists.
-</li>
-<p>
-
-<li><a href="withAttribute.py">withAttribute.py</a><br>
-An example using withAttribute, a helper method to define parse actions to validate matched HTML tags
-using additional attributes. Especially helpful for matching common tags such as <DIV> and <TD>.
-</li>
-<p>
-
-<li><a href="stackish.py">stackish.py</a><br>
-A parser for the data representation format, Stackish.
-</li>
-<p>
-
-<li><a href="builtin_parse_action_demo.py">builtin_parse_action_demo.py</a><br>
-<b>New in version 1.5.7</b><br>
-Demonstration of using builtins (min, max, sum, len, etc.) as parse actions.
-</li>
-<p>
-
-<li><a href="antlr_grammar.py">antlr_grammar.py</a><i>~ submission by Luca DellOlio</i><br>
-<b>New in version 1.5.7</b><br>
-Pyparsing example parsing ANTLR .a files and generating a working pyparsing parser.
-</li>
-<p>
-
-<li><a href="shapes.py">shapes.py</a><br>
-<b>New in version 1.5.7</b><br>
-Parse actions example simple shape definition syntax, and returning the matched tokens as
-domain objects instead of just strings.
-</li>
-<p>
-
-<li><a href="datetimeParseActions.py">datetimeParseActions.py</a><br>
-<b>New in version 1.5.7</b><br>
-Parse actions example showing a parse action returning a datetime object instead of
-string tokens, and doing validation of the tokens, raising a ParseException if the
-given YYYY/MM/DD string does not represent a valid date.
-</li>
-<p>
-
-<li><a href="position.py">position.py</a><br>
-<b>New in version 1.5.7</b><br>
-Demonstration of a couple of different ways to capture the location a particular
-expression was found within the overall input string.
-</li>
-<p>
-
-
-</ul>
-
-</body></html>
+<HTML> +<title>pyparsing Examples</title> +<body> +<h1>pyparsing Examples</h1> +<p> +This directory contains a number of Python scripts that can get you started in learning to use pyparsing. + +<ul> +<li><a href="greeting.py">greeting.py</a><br> +Parse "Hello, World!". +</li> +<p> + +<li><a href="greetingInKorean.py">greetingInKorean.py</a> <i>~ submission by June Kim</i><br> +Unicode example to parse "Hello, World!" in Korean. +</li> +<p> + +<li><a href="greetingInGreek.py">greetingInGreek.py</a> <i>~ submission by ???</i><br> +Unicode example to parse "Hello, World!" in Greek. +</li> +<p> + +<li><a href="holaMundo.py">holaMundo.py</a> <i>~ submission by Marco Alfonso</i><br> +"Hello, World!" example translated to Spanish, from Marco Alfonso's blog. +</li> +<p> + +<li><a href="chemicalFormulas.py">chemicalFormulas.py</a><br> +Simple example to demonstrate the use of ParseResults returned from parseString(). +Parses a chemical formula (such as "H2O" or "C6H5OH"), and walks the returned list of tokens to calculate the molecular weight. +</li> +<p> + +<li><a href="wordsToNum.py">wordsToNum.py</a><br> +A sample program that reads a number in words (such as "fifteen hundred and sixty four"), and returns the actual number (1564). +Also demonstrates some processing of ParseExceptions, including marking where the parse failure was found. +</li> +<p> + +<li><a href="pythonGrammarparser.py">pythonGrammarparser.py</a> <i>~ suggested by JH Stovall</i><br> +A sample program that parses the EBNF used in the Python source code to define the Python grammar. From this parser, +one can generate Python grammar documentation tools, such as railroad track diagrams. Also demonstrates use of +Dict class. +</li> +<p> + +<li><a href="commasep.py">commasep.py</a><br> +Demonstration of the use of the commaSeparatedList helper. Shows examples of +proper handling of commas within quotes, trimming of whitespace around delimited entries, and handling of consecutive commas (null arguments). Includes comparison with simple string.split(','). +</li> +<p> + +<li><a href="dictExample.py">dictExample.py</a><br> +A demonstration of using the Dict class, to parse a table of ASCII tabulated data. +</li> +<p> + +<li><a href="dictExample2.py">dictExample2.py</a> <i>~ submission by Mike Kelly</i><br> +An extended version of dictExample.py, in which Mike Kelly also parses the column headers, and generates a transposed version of the original table! +</li> +<p> + +<li><a href="scanExamples.py">scanExamples.py</a><br> +Some examples of using scanString and transformString, as alternative parsing methods to parseString, to do macro substitution, and selection and/or removal of matching strings within a source file. +</li> +<p> + +<li><a href="urlExtractorNew.py">urlExtractorNew.py</a><br> +A sample program showing sample definitions and applications of HTML tag expressions +created using makeHTMLTags helper function. Very useful for scraping data from HTML pages. +</li> +<p> + +<li><a href="fourFn.py">fourFn.py</a><br> +A simple algebraic expression parser, that performs +,-,*,/, and ^ arithmetic operations. (With suggestions and bug-fixes graciously offered by Andrea Griffini.) +</li> +<p> + +<li><a href="SimpleCalc.py">SimpleCalc.py</a> <i>~ submission by Steven Siew</i><br> +An interactive version of fourFn.py, with support for variables. +</li> +<p> + +<li><a href="LAParser.py">LAParser.py</a> <i>~ submission by Mike Ellis</i><br> +An interactive Linear Algebra Parser, an extension of SimpleCalc.py. Supports linear algebra (LA) notation for vectors, matrices, and scalars, +including matrix operations such as inversion and determinants. Converts LA expressions to C code - uses a separate C library for runtime +evaluation of results. +</li> +<p> + +<li><a href="configParse.py">configParse.py</a><br> +A simple alternative to Python's ConfigParse module, demonstrating the use of the Dict class to return nested dictionary access to configuration values. +</li> +<p> + +<li><a href="getNTPserversNew.py">getNTPserversNew.py</a><br> +Yet another scanString example, to read/extract the list of NTP servers from NIST's web site. +Uses the new makeHTMLTags() method. +</li> +<p> + +<li><a href="httpServerLogParser.py">httpServerLogParser.py</a><br> +Parser for Apache server log files. +</li> +<p> + +<li><a href="idlParse.py">idlParse.py</a><br> +Parser for CORBA IDL files. +</li> +<p> + +<li><a href="mozillaCalendarParser.py">mozillaCalendarParser.py</a> +<i>~ submission by Petri Savolainen</i><br> +Parser for Mozilla calendar (*.ics) files. +</li> +<p> + +<li><a href="pgn.py">pgn.py</a> <i>~ submission by Alberto Santini</i><br> +Parser for PGN (Portable Game Notation) files, the standard form for documenting the moves in chess games. +</li> +<p> + +<li><a href="simpleSQL.py">simpleSQL.py</a><br> +A simple parser that will extract table and column names from SQL SELECT statements.. +</li> +<p> + +<li><a href="dfmparse.py">dfmparse.py</a> <i>~ submission by Dan Griffith</i><br> +Parser for Delphi forms. +</li> +<p> + +<li><a href="ebnf.py">ebnf.py / ebnftest.py</a> <i>~ submission by Seo Sanghyeon</i><br> +An EBNF-compiler that reads EBNF and generates a pyparsing grammar! Including a test that compiles... EBNF itself! +</li> +<p> + +<li><a href="searchparser.py">searchparser.py</a> <i>~ submission by Steven Mooij and Rudolph Froger</i><br> +An expression parser that parses search strings, with special keyword and expression operations using (), not, and, or, and quoted strings. +</li> +<p> + +<li><a href="sparser.py">sparser.py</a> <i>~ submission by Tim Cera</i><br> +A configurable parser module that can be configured with a list of tuples, giving a high-level definition for parsing common sets +of water table data files. Tim had to contend with several different styles of data file formats, each with slight variations of its own. +Tim created a configurable parser (or "SPECIFIED parser" - hence the name "sparser"), that simply works from a config variable listing +the field names and data types, and implicitly, their order in the source data file. +<p> +See <a href="mayport_florida_8720220_data_def.txt">mayport_florida_8720220_data_def.txt</a> for an +example configuration file. +</li> +<p> + +<li><a href="romanNumerals.py">romanNumerals.py</a><br> +A Roman numeral generator and parser example, showing the power of parse actions +to compile Roman numerals into their integer values. +</li> +<p> + +<li><a href="removeLineBreaks.py">removeLineBreaks.py</a><br> +A string transformer that converts text files with hard line-breaks into one with line breaks +only between paragraphs. Useful when converting downloads from +<a href="https://www.gutenberg.org/">Project Gutenberg</a> to import to word processing apps +that can reformat paragraphs once hard line-breaks are removed, or for loading into your Palm Pilot for portable perusal. +<p> +See <a href="Successful Methods of Public Speaking.txt">Successful Methods of Public Speaking.txt</a> and +<a href="Successful Methods of Public Speaking(2).txt">Successful Methods of Public Speaking(2).txt</a> for a sample +before and after (text file courtesy of Project Gutenberg). +</li> +<p> + +<li><a href="listAllMatches.py">listAllMatches.py</a><br> +An example program showing the utility of the listAllMatches option when specifying results naming. +</li> +<p> + +<li><a href="linenoExample.py">linenoExample.py</a><br> +An example program showing how to use the string location to extract line and column numbers, or the +source line of text. +</li> +<p> + +<li><a href="parseListString.py">parseListString.py</a><br> +An example program showing a progression of steps, how to parse a string representation of a Python +list back into a true list. +</li> +<p> + +<li><a href="parsePythonValue.py">parsePythonValue.py</a><br> +An extension of parseListString.py to parse tuples and dicts, including nested values, +returning a Python value of the original type. +</li> +<p> + +<li><a href="indentedGrammarExample.py">indentedGrammarExample.py</a><br> +An example program showing how to parse a grammar using indentation for grouping, +such as is done in Python. +</li> +<p> + +<li><a href="simpleArith.py">simpleArith.py</a><br> +An example program showing how to use the new operatorPrecedence helper method to define a 6-function +(+, -, *, /, ^, and !) arithmetic expression parser, with unary plus and minus signs. +</li> +<p> + +<li><a href="simpleBool.py">simpleBool.py</a><br> +An example program showing how to use the new operatorPrecedence helper method to define a +boolean expression parser, with parse actions associated with each operator to "compile" the expression +into a data structure that will evaluate the expression's boolean value. +</li> +<p> + +<li><a href="simpleWiki.py">simpleWiki.py</a><br> +An example program showing how to use transformString to implement a simple Wiki markup parser. +</li> +<p> + +<li><a href="sql2dot.py">sql2dot.py</a><i>~ submission by EnErGy [CSDX]</i><br> +A nice graphing program that generates schema diagrams from SQL table definition statements. +</li> +<p> + +<li><a href="htmlStripper.py">htmlStripper.py</a><br> +An example implementation of a common application, removing HTML markup tags from an HTML page, +leaving just the text content. +</li> +<p> + +<li><a href="macroExpansion.py">macroExpansion.py</a><br> +An example implementation of a simple preprocessor, that will read embedded macro definitions +and replace macro references with the defined substitution string. +</li> +<p> + +<li><a href="sexpParser.py">sexpParser.py</a><br> +A parser that uses a recursive grammar to parse S-expressions. +</li> +<p> + +<li><a href="nested.py">nested.py</a><br> +An example using nestedExpr, a helper method to simplify definitions of expressions of nested lists. +</li> +<p> + +<li><a href="withAttribute.py">withAttribute.py</a><br> +An example using withAttribute, a helper method to define parse actions to validate matched HTML tags +using additional attributes. Especially helpful for matching common tags such as <DIV> and <TD>. +</li> +<p> + +<li><a href="stackish.py">stackish.py</a><br> +A parser for the data representation format, Stackish. +</li> +<p> + +<li><a href="builtin_parse_action_demo.py">builtin_parse_action_demo.py</a><br> +<b>New in version 1.5.7</b><br> +Demonstration of using builtins (min, max, sum, len, etc.) as parse actions. +</li> +<p> + +<li><a href="antlr_grammar.py">antlr_grammar.py</a><i>~ submission by Luca DellOlio</i><br> +<b>New in version 1.5.7</b><br> +Pyparsing example parsing ANTLR .a files and generating a working pyparsing parser. +</li> +<p> + +<li><a href="shapes.py">shapes.py</a><br> +<b>New in version 1.5.7</b><br> +Parse actions example simple shape definition syntax, and returning the matched tokens as +domain objects instead of just strings. +</li> +<p> + +<li><a href="datetimeParseActions.py">datetimeParseActions.py</a><br> +<b>New in version 1.5.7</b><br> +Parse actions example showing a parse action returning a datetime object instead of +string tokens, and doing validation of the tokens, raising a ParseException if the +given YYYY/MM/DD string does not represent a valid date. +</li> +<p> + +<li><a href="position.py">position.py</a><br> +<b>New in version 1.5.7</b><br> +Demonstration of a couple of different ways to capture the location a particular +expression was found within the overall input string. +</li> +<p> + + +</ul> + +</body></html> diff --git a/examples/getNTPservers.py b/examples/getNTPservers.py deleted file mode 100644 index bbf1d60..0000000 --- a/examples/getNTPservers.py +++ /dev/null @@ -1,30 +0,0 @@ -# getNTPservers.py
-#
-# Demonstration of the parsing module, implementing a HTML page scanner,
-# to extract a list of NTP time servers from the NIST web site.
-#
-# Copyright 2004, by Paul McGuire
-#
-from pyparsing import Word, Combine, Suppress, CharsNotIn, nums
-import urllib.request, urllib.parse, urllib.error
-
-integer = Word(nums)
-ipAddress = Combine( integer + "." + integer + "." + integer + "." + integer )
-tdStart = Suppress("<td>")
-tdEnd = Suppress("</td>")
-timeServerPattern = tdStart + ipAddress.setResultsName("ipAddr") + tdEnd + \
- tdStart + CharsNotIn("<").setResultsName("loc") + tdEnd
-
-# get list of time servers
-nistTimeServerURL = "http://www.boulder.nist.gov/timefreq/service/time-servers.html"
-serverListPage = urllib.request.urlopen( nistTimeServerURL )
-serverListHTML = serverListPage.read()
-serverListPage.close()
-
-addrs = {}
-for srvr,startloc,endloc in timeServerPattern.scanString( serverListHTML ):
- print(srvr.ipAddr, "-", srvr.loc)
- addrs[srvr.ipAddr] = srvr.loc
- # or do this:
- #~ addr,loc = srvr
- #~ print addr, "-", loc
diff --git a/examples/getNTPserversNew.py b/examples/getNTPserversNew.py index c87c0ae..c86e756 100644 --- a/examples/getNTPserversNew.py +++ b/examples/getNTPserversNew.py @@ -1,35 +1,36 @@ -# getNTPserversNew.py
-#
-# Demonstration of the parsing module, implementing a HTML page scanner,
-# to extract a list of NTP time servers from the NIST web site.
-#
-# Copyright 2004-2010, by Paul McGuire
-# September, 2010 - updated to more current use of setResultsName, new NIST URL
-#
-from pyparsing import (Word, Combine, SkipTo, nums, makeHTMLTags,
- delimitedList, alphas, alphanums)
-try:
- import urllib.request
- urlopen = urllib.request.urlopen
-except ImportError:
- import urllib
- urlopen = urllib.urlopen
-
-integer = Word(nums)
-ipAddress = Combine( integer + "." + integer + "." + integer + "." + integer )
-hostname = delimitedList(Word(alphas,alphanums+"-_"),".",combine=True)
-tdStart,tdEnd = makeHTMLTags("td")
-timeServerPattern = (tdStart + hostname("hostname") + tdEnd +
- tdStart + ipAddress("ipAddr") + tdEnd +
- tdStart + SkipTo(tdEnd)("loc") + tdEnd)
-
-# get list of time servers
-nistTimeServerURL = "https://tf.nist.gov/tf-cgi/servers.cgi#"
-serverListPage = urlopen( nistTimeServerURL )
-serverListHTML = serverListPage.read().decode("UTF-8")
-serverListPage.close()
-
-addrs = {}
-for srvr,startloc,endloc in timeServerPattern.scanString( serverListHTML ):
- print("{0} ({1}) - {2}".format(srvr.ipAddr, srvr.hostname.strip(), srvr.loc.strip()))
- addrs[srvr.ipAddr] = srvr.loc
+# getNTPserversNew.py +# +# Demonstration of the parsing module, implementing a HTML page scanner, +# to extract a list of NTP time servers from the NIST web site. +# +# Copyright 2004-2010, by Paul McGuire +# September, 2010 - updated to more current use of setResultsName, new NIST URL +# +import pyparsing as pp +ppc = pp.pyparsing_common +from contextlib import closing + +try: + import urllib.request + urlopen = urllib.request.urlopen +except ImportError: + import urllib + urlopen = urllib.urlopen + +integer = pp.Word(pp.nums) +ipAddress = ppc.ipv4_address() +hostname = pp.delimitedList(pp.Word(pp.alphas, pp.alphanums+"-_"), ".", combine=True) +tdStart, tdEnd = pp.makeHTMLTags("td") +timeServerPattern = (tdStart + hostname("hostname") + tdEnd + + tdStart + ipAddress("ipAddr") + tdEnd + + tdStart + tdStart.tag_body("loc") + tdEnd) + +# get list of time servers +nistTimeServerURL = "https://tf.nist.gov/tf-cgi/servers.cgi#" +with closing(urlopen(nistTimeServerURL)) as serverListPage: + serverListHTML = serverListPage.read().decode("UTF-8") + +addrs = {} +for srvr, startloc, endloc in timeServerPattern.scanString(serverListHTML): + print("{0} ({1}) - {2}".format(srvr.ipAddr, srvr.hostname.strip(), srvr.loc.strip())) + addrs[srvr.ipAddr] = srvr.loc diff --git a/examples/htmlStripper.py b/examples/htmlStripper.py index c3dbcf1..18f3395 100644 --- a/examples/htmlStripper.py +++ b/examples/htmlStripper.py @@ -1,32 +1,32 @@ -#
-# htmlStripper.py
-#
-# Sample code for stripping HTML markup tags and scripts from
-# HTML source files.
-#
-# Copyright (c) 2006, 2016, Paul McGuire
-#
-from contextlib import closing
-import urllib.request, urllib.parse, urllib.error
-from pyparsing import (makeHTMLTags, SkipTo, commonHTMLEntity, replaceHTMLEntity,
- htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith)
-
-scriptOpen,scriptClose = makeHTMLTags("script")
-scriptBody = scriptOpen + SkipTo(scriptClose) + scriptClose
-commonHTMLEntity.setParseAction(replaceHTMLEntity)
-
-# get some HTML
-targetURL = "https://wiki.python.org/moin/PythonDecoratorLibrary"
-with closing(urllib.request.urlopen( targetURL )) as targetPage:
- targetHTML = targetPage.read().decode("UTF-8")
-
-# first pass, strip out tags and translate entities
-firstPass = (htmlComment | scriptBody | commonHTMLEntity |
- anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML)
-
-# first pass leaves many blank lines, collapse these down
-repeatedNewlines = LineEnd() + OneOrMore(LineEnd())
-repeatedNewlines.setParseAction(replaceWith("\n\n"))
-secondPass = repeatedNewlines.transformString(firstPass)
-
-print(secondPass)
+# +# htmlStripper.py +# +# Sample code for stripping HTML markup tags and scripts from +# HTML source files. +# +# Copyright (c) 2006, 2016, Paul McGuire +# +from contextlib import closing +import urllib.request, urllib.parse, urllib.error +from pyparsing import (makeHTMLTags, commonHTMLEntity, replaceHTMLEntity, + htmlComment, anyOpenTag, anyCloseTag, LineEnd, OneOrMore, replaceWith) + +scriptOpen, scriptClose = makeHTMLTags("script") +scriptBody = scriptOpen + scriptOpen.tag_body + scriptClose +commonHTMLEntity.setParseAction(replaceHTMLEntity) + +# get some HTML +targetURL = "https://wiki.python.org/moin/PythonDecoratorLibrary" +with closing(urllib.request.urlopen( targetURL )) as targetPage: + targetHTML = targetPage.read().decode("UTF-8") + +# first pass, strip out tags and translate entities +firstPass = (htmlComment | scriptBody | commonHTMLEntity | + anyOpenTag | anyCloseTag ).suppress().transformString(targetHTML) + +# first pass leaves many blank lines, collapse these down +repeatedNewlines = LineEnd()*(2,) +repeatedNewlines.setParseAction(replaceWith("\n\n")) +secondPass = repeatedNewlines.transformString(firstPass) + +print(secondPass) diff --git a/examples/htmlTableParser.py b/examples/htmlTableParser.py index 5fe8e5f..35cdd03 100644 --- a/examples/htmlTableParser.py +++ b/examples/htmlTableParser.py @@ -23,12 +23,12 @@ a, a_end = pp.makeHTMLTags('a') strip_html = (pp.anyOpenTag | pp.anyCloseTag).suppress().transformString # expression for parsing <a href="url">text</a> links, returning a (text, url) tuple -link = pp.Group(a + pp.SkipTo(a_end)('text') + a_end.suppress()) +link = pp.Group(a + a.tag_body('text') + a_end.suppress()) link.addParseAction(lambda t: (t[0].text, t[0].href)) # method to create table rows of header and data tags def table_row(start_tag, end_tag): - body = pp.SkipTo(end_tag) + body = start_tag.tag_body body.addParseAction(pp.tokenMap(str.strip), pp.tokenMap(strip_html)) row = pp.Group(tr.suppress() diff --git a/examples/makeHTMLTagExample.py b/examples/makeHTMLTagExample.py deleted file mode 100644 index 76774bf..0000000 --- a/examples/makeHTMLTagExample.py +++ /dev/null @@ -1,21 +0,0 @@ -import urllib.request, urllib.parse, urllib.error
-
-from pyparsing import makeHTMLTags, SkipTo
-
-# read HTML from a web page
-serverListPage = urllib.request.urlopen( "https://www.yahoo.com/" )
-htmlText = serverListPage.read()
-serverListPage.close()
-
-# using makeHTMLTags to define opening and closing tags
-anchorStart,anchorEnd = makeHTMLTags("a")
-
-# compose an expression for an anchored reference
-anchor = anchorStart + SkipTo(anchorEnd)("body") + anchorEnd
-
-# use scanString to scan through the HTML source, extracting
-# just the anchor tags and their associated body text
-# (note the href attribute of the opening A tag is available
-# as an attribute in the returned parse results)
-for tokens,start,end in anchor.scanString(htmlText):
- print(tokens.body,'->',tokens.href)
diff --git a/examples/scanYahoo.py b/examples/scanYahoo.py deleted file mode 100644 index 9ecb5e9..0000000 --- a/examples/scanYahoo.py +++ /dev/null @@ -1,14 +0,0 @@ -from pyparsing import makeHTMLTags,SkipTo,htmlComment
-import urllib.request, urllib.parse, urllib.error
-
-serverListPage = urllib.request.urlopen( "https://www.yahoo.com/" )
-htmlText = serverListPage.read()
-serverListPage.close()
-
-aStart,aEnd = makeHTMLTags("A")
-
-link = aStart + SkipTo(aEnd).setResultsName("link") + aEnd
-link.ignore(htmlComment)
-
-for toks,start,end in link.scanString(htmlText):
- print(toks.link, "->", toks.startA.href)
\ No newline at end of file diff --git a/examples/urlExtractor.py b/examples/urlExtractor.py index e4299b9..fbc2fa6 100644 --- a/examples/urlExtractor.py +++ b/examples/urlExtractor.py @@ -1,30 +1,30 @@ -# URL extractor
-# Copyright 2004, Paul McGuire
-from pyparsing import makeHTMLTags, SkipTo, pyparsing_common as ppc
-import urllib.request
-from contextlib import closing
-import pprint
-
-linkOpenTag, linkCloseTag = makeHTMLTags('a')
-
-linkBody = SkipTo(linkCloseTag)
-linkBody.setParseAction(ppc.stripHTMLTags)
-linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split()))
-
-link = linkOpenTag + linkBody("body") + linkCloseTag.suppress()
-
-# Go get some HTML with some links in it.
-with closing(urllib.request.urlopen("https://www.yahoo.com/")) as serverListPage:
- htmlText = serverListPage.read().decode("UTF-8")
-
-# scanString is a generator that loops through the input htmlText, and for each
-# match yields the tokens and start and end locations (for this application, we are
-# not interested in the start and end values).
-for toks,strt,end in link.scanString(htmlText):
- print(toks.asList())
-
-# Create dictionary from list comprehension, assembled from each pair of tokens returned
-# from a matched URL.
-pprint.pprint(
- {toks.body: toks.href for toks,strt,end in link.scanString(htmlText)}
- )
+# URL extractor +# Copyright 2004, Paul McGuire +from pyparsing import makeHTMLTags, pyparsing_common as ppc +import urllib.request +from contextlib import closing +import pprint + +linkOpenTag, linkCloseTag = makeHTMLTags('a') + +linkBody = linkOpenTag.tag_body +linkBody.setParseAction(ppc.stripHTMLTags) +linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split())) + +link = linkOpenTag + linkBody("body") + linkCloseTag.suppress() + +# Go get some HTML with some links in it. +with closing(urllib.request.urlopen("https://www.cnn.com/")) as serverListPage: + htmlText = serverListPage.read().decode("UTF-8") + +# scanString is a generator that loops through the input htmlText, and for each +# match yields the tokens and start and end locations (for this application, we are +# not interested in the start and end values). +for toks, strt, end in link.scanString(htmlText): + print(toks.asList()) + +# Create dictionary from list comprehension, assembled from each pair of tokens returned +# from a matched URL. +pprint.pprint( + {toks.body: toks.href for toks, strt, end in link.scanString(htmlText)} + ) diff --git a/examples/urlExtractorNew.py b/examples/urlExtractorNew.py index a21b2ab..d876eea 100644 --- a/examples/urlExtractorNew.py +++ b/examples/urlExtractorNew.py @@ -1,31 +1,31 @@ -# URL extractor
-# Copyright 2004, Paul McGuire
-from pyparsing import SkipTo, makeHTMLTags
-import urllib.request, urllib.parse, urllib.error
-import pprint
-
-# Define the pyparsing grammar for a URL, that is:
-# URLlink ::= <a href= URL>linkText</a>
-# URL ::= doubleQuotedString | alphanumericWordPath
-# Note that whitespace may appear just about anywhere in the link. Note also
-# that it is not necessary to explicitly show this in the pyparsing grammar; by default,
-# pyparsing skips over whitespace between tokens.
-linkOpenTag,linkCloseTag = makeHTMLTags("a")
-link = linkOpenTag + SkipTo(linkCloseTag)("body") + linkCloseTag.suppress()
-
-# Go get some HTML with some links in it.
-serverListPage = urllib.request.urlopen( "https://www.google.com/" )
-htmlText = serverListPage.read()
-serverListPage.close()
-
-# scanString is a generator that loops through the input htmlText, and for each
-# match yields the tokens and start and end locations (for this application, we are
-# not interested in the start and end values).
-for toks,strt,end in link.scanString(htmlText):
- print(toks.startA.href,"->",toks.body)
-
-# Create dictionary from list comprehension, assembled from each pair of tokens returned
-# from a matched URL.
-pprint.pprint(
- { toks.body:toks.startA.href for toks,strt,end in link.scanString(htmlText) }
- )
+# URL extractor +# Copyright 2004, Paul McGuire +from pyparsing import makeHTMLTags +from contextlib import closing +import urllib.request, urllib.parse, urllib.error +import pprint + +# Define the pyparsing grammar for a URL, that is: +# URLlink ::= <a href= URL>linkText</a> +# URL ::= doubleQuotedString | alphanumericWordPath +# Note that whitespace may appear just about anywhere in the link. Note also +# that it is not necessary to explicitly show this in the pyparsing grammar; by default, +# pyparsing skips over whitespace between tokens. +linkOpenTag, linkCloseTag = makeHTMLTags("a") +link = linkOpenTag + linkOpenTag.tag_body("body") + linkCloseTag.suppress() + +# Go get some HTML with some links in it. +with closing(urllib.request.urlopen("https://www.cnn.com/")) as serverListPage: + htmlText = serverListPage.read() + +# scanString is a generator that loops through the input htmlText, and for each +# match yields the tokens and start and end locations (for this application, we are +# not interested in the start and end values). +for toks, strt, end in link.scanString(htmlText): + print(toks.startA.href, "->", toks.body) + +# Create dictionary from list comprehension, assembled from each pair of tokens returned +# from a matched URL. +pprint.pprint( + {toks.body: toks.startA.href for toks, strt, end in link.scanString(htmlText)} + ) diff --git a/examples/withAttribute.py b/examples/withAttribute.py index 7fa2bc8..fc0c64a 100644 --- a/examples/withAttribute.py +++ b/examples/withAttribute.py @@ -1,24 +1,26 @@ -#
-# withAttribute.py
-# Copyright, 2007 - Paul McGuire
-#
-# Simple example of using withAttribute parse action helper
-# to define
-#
-data = """\
- <td align=right width=80><font size=2 face="New Times Roman,Times,Serif"> 49.950 </font></td>
- <td align=left width=80><font size=2 face="New Times Roman,Times,Serif"> 50.950 </font></td>
- <td align=right width=80><font size=2 face="New Times Roman,Times,Serif"> 51.950 </font></td>
- """
-
-from pyparsing import *
-
-tdS,tdE = makeHTMLTags("TD")
-fontS,fontE = makeHTMLTags("FONT")
-realNum = Combine( Word(nums) + "." + Word(nums) ).setParseAction(lambda t:float(t[0]))
-NBSP = Literal(" ")
-patt = tdS + fontS + NBSP + realNum("value") + NBSP + fontE + tdE
-
-tdS.setParseAction( withAttribute(align="right",width="80") )
-for s in patt.searchString(data):
- print(s.value)
+# +# withAttribute.py +# Copyright, 2007 - Paul McGuire +# +# Simple example of using withAttribute parse action helper +# to define +# +import pyparsing as pp + +data = """\ + <td align=right width=80><font size=2 face="New Times Roman,Times,Serif"> 49.950 </font></td> + <td align=left width=80><font size=2 face="New Times Roman,Times,Serif"> 50.950 </font></td> + <td align=right width=80><font size=2 face="New Times Roman,Times,Serif"> 51.950 </font></td> + """ + +td, tdEnd = pp.makeHTMLTags("TD") +font, fontEnd = pp.makeHTMLTags("FONT") +realNum = pp.pyparsing_common.real +NBSP = pp.Literal(" ") +patt = td + font + NBSP + realNum("value") + NBSP + fontEnd + tdEnd + +# always use addParseAction when adding withAttribute as a parse action to a start tag +td.addParseAction(pp.withAttribute(align="right", width="80")) + +for s in patt.searchString(data): + print(s.value) |