diff options
| author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2013-05-08 22:21:52 +0000 |
|---|---|---|
| committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2013-05-08 22:21:52 +0000 |
| commit | 2f253cfc85ffd55a8acb988e91f0bc5ab348124c (patch) | |
| tree | 4734ccd522c71dd455879162006742002f8c1565 /eg/htext | |
| download | HTML-Parser-tarball-master.tar.gz | |
HTML-Parser-3.71HEADHTML-Parser-3.71master
Diffstat (limited to 'eg/htext')
| -rwxr-xr-x | eg/htext | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/eg/htext b/eg/htext new file mode 100755 index 0000000..e4d276d --- /dev/null +++ b/eg/htext @@ -0,0 +1,29 @@ +#!/usr/bin/perl -w + +# Extract all plain text from an HTML file + +use strict; +use HTML::Parser 3.00 (); + +my %inside; + +sub tag +{ + my($tag, $num) = @_; + $inside{$tag} += $num; + print " "; # not for all tags +} + +sub text +{ + return if $inside{script} || $inside{style}; + print $_[0]; +} + +HTML::Parser->new(api_version => 3, + handlers => [start => [\&tag, "tagname, '+1'"], + end => [\&tag, "tagname, '-1'"], + text => [\&text, "dtext"], + ], + marked_sections => 1, + )->parse_file(shift) || die "Can't open file: $!\n";; |
