diff options
Diffstat (limited to 'Parser/Style')
-rw-r--r-- | Parser/Style/Debug.pm | 52 | ||||
-rw-r--r-- | Parser/Style/Objects.pm | 78 | ||||
-rw-r--r-- | Parser/Style/Stream.pm | 184 | ||||
-rw-r--r-- | Parser/Style/Subs.pm | 58 | ||||
-rw-r--r-- | Parser/Style/Tree.pm | 90 |
5 files changed, 462 insertions, 0 deletions
diff --git a/Parser/Style/Debug.pm b/Parser/Style/Debug.pm new file mode 100644 index 0000000..89fcd8b --- /dev/null +++ b/Parser/Style/Debug.pm @@ -0,0 +1,52 @@ +# $Id: Debug.pm,v 1.1 2003/07/27 16:07:49 matt Exp $ + +package XML::Parser::Style::Debug; +use strict; + +sub Start { + my $expat = shift; + my $tag = shift; + print STDERR "@{$expat->{Context}} \\\\ (@_)\n"; +} + +sub End { + my $expat = shift; + my $tag = shift; + print STDERR "@{$expat->{Context}} //\n"; +} + +sub Char { + my $expat = shift; + my $text = shift; + $text =~ s/([\x80-\xff])/sprintf "#x%X;", ord $1/eg; + $text =~ s/([\t\n])/sprintf "#%d;", ord $1/eg; + print STDERR "@{$expat->{Context}} || $text\n"; +} + +sub Proc { + my $expat = shift; + my $target = shift; + my $text = shift; + my @foo = @{$expat->{Context}}; + print STDERR "@foo $target($text)\n"; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Debug - Debug style for XML::Parser + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Debug'); + $p->parsefile('foo.xml'); + +=head1 DESCRIPTION + +This just prints out the document in outline form to STDERR. Nothing special is +returned by parse. + +=cut
\ No newline at end of file diff --git a/Parser/Style/Objects.pm b/Parser/Style/Objects.pm new file mode 100644 index 0000000..8603db0 --- /dev/null +++ b/Parser/Style/Objects.pm @@ -0,0 +1,78 @@ +# $Id: Objects.pm,v 1.1 2003/08/18 20:20:51 matt Exp $ + +package XML::Parser::Style::Objects; +use strict; + +sub Init { + my $expat = shift; + $expat->{Lists} = []; + $expat->{Curlist} = $expat->{Tree} = []; +} + +sub Start { + my $expat = shift; + my $tag = shift; + my $newlist = [ ]; + my $class = "${$expat}{Pkg}::$tag"; + my $newobj = bless { @_, Kids => $newlist }, $class; + push @{ $expat->{Lists} }, $expat->{Curlist}; + push @{ $expat->{Curlist} }, $newobj; + $expat->{Curlist} = $newlist; +} + +sub End { + my $expat = shift; + my $tag = shift; + $expat->{Curlist} = pop @{ $expat->{Lists} }; +} + +sub Char { + my $expat = shift; + my $text = shift; + my $class = "${$expat}{Pkg}::Characters"; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + if ($pos >= 0 and ref($clist->[$pos]) eq $class) { + $clist->[$pos]->{Text} .= $text; + } else { + push @$clist, bless { Text => $text }, $class; + } +} + +sub Final { + my $expat = shift; + delete $expat->{Curlist}; + delete $expat->{Lists}; + $expat->{Tree}; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Objects + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Objects', Pkg => 'MyNode'); + my $tree = $p->parsefile('foo.xml'); + +=head1 DESCRIPTION + +This module implements XML::Parser's Objects style parser. + +This is similar to the Tree style, except that a hash object is created for +each element. The corresponding object will be in the class whose name +is created by appending "::" and the element name to the package set with +the Pkg option. Non-markup text will be in the ::Characters class. The +contents of the corresponding object will be in an anonymous array that +is the value of the Kids property for that object. + +=head1 SEE ALSO + +L<XML::Parser::Style::Tree> + +=cut
\ No newline at end of file diff --git a/Parser/Style/Stream.pm b/Parser/Style/Stream.pm new file mode 100644 index 0000000..1e2e3f7 --- /dev/null +++ b/Parser/Style/Stream.pm @@ -0,0 +1,184 @@ +# $Id: Stream.pm,v 1.1 2003/07/27 16:07:49 matt Exp $ + +package XML::Parser::Style::Stream; +use strict; + +# This style invented by Tim Bray <tbray@textuality.com> + +sub Init { + no strict 'refs'; + my $expat = shift; + $expat->{Text} = ''; + my $sub = $expat->{Pkg} ."::StartDocument"; + &$sub($expat) + if defined(&$sub); +} + +sub Start { + no strict 'refs'; + my $expat = shift; + my $type = shift; + + doText($expat); + $_ = "<$type"; + + %_ = @_; + while (@_) { + $_ .= ' ' . shift() . '="' . shift() . '"'; + } + $_ .= '>'; + + my $sub = $expat->{Pkg} . "::StartTag"; + if (defined(&$sub)) { + &$sub($expat, $type); + } else { + print; + } +} + +sub End { + no strict 'refs'; + my $expat = shift; + my $type = shift; + + # Set right context for Text handler + push(@{$expat->{Context}}, $type); + doText($expat); + pop(@{$expat->{Context}}); + + $_ = "</$type>"; + + my $sub = $expat->{Pkg} . "::EndTag"; + if (defined(&$sub)) { + &$sub($expat, $type); + } else { + print; + } +} + +sub Char { + my $expat = shift; + $expat->{Text} .= shift; +} + +sub Proc { + no strict 'refs'; + my $expat = shift; + my $target = shift; + my $text = shift; + + doText($expat); + + $_ = "<?$target $text?>"; + + my $sub = $expat->{Pkg} . "::PI"; + if (defined(&$sub)) { + &$sub($expat, $target, $text); + } else { + print; + } +} + +sub Final { + no strict 'refs'; + my $expat = shift; + my $sub = $expat->{Pkg} . "::EndDocument"; + &$sub($expat) + if defined(&$sub); +} + +sub doText { + no strict 'refs'; + my $expat = shift; + $_ = $expat->{Text}; + + if (length($_)) { + my $sub = $expat->{Pkg} . "::Text"; + if (defined(&$sub)) { + &$sub($expat); + } else { + print; + } + + $expat->{Text} = ''; + } +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Stream - Stream style for XML::Parser + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Stream', Pkg => 'MySubs'); + $p->parsefile('foo.xml'); + + { + package MySubs; + + sub StartTag { + my ($e, $name) = @_; + # do something with start tags + } + + sub EndTag { + my ($e, $name) = @_; + # do something with end tags + } + + sub Characters { + my ($e, $data) = @_; + # do something with text nodes + } + } + +=head1 DESCRIPTION + +This style uses the Pkg option to find subs in a given package to call for each event. +If none of the subs that this +style looks for is there, then the effect of parsing with this style is +to print a canonical copy of the document without comments or declarations. +All the subs receive as their 1st parameter the Expat instance for the +document they're parsing. + +It looks for the following routines: + +=over 4 + +=item * StartDocument + +Called at the start of the parse . + +=item * StartTag + +Called for every start tag with a second parameter of the element type. The $_ +variable will contain a copy of the tag and the %_ variable will contain +attribute values supplied for that element. + +=item * EndTag + +Called for every end tag with a second parameter of the element type. The $_ +variable will contain a copy of the end tag. + +=item * Text + +Called just before start or end tags with accumulated non-markup text in +the $_ variable. + +=item * PI + +Called for processing instructions. The $_ variable will contain a copy of +the PI and the target and data are sent as 2nd and 3rd parameters +respectively. + +=item * EndDocument + +Called at conclusion of the parse. + +=back + +=cut
\ No newline at end of file diff --git a/Parser/Style/Subs.pm b/Parser/Style/Subs.pm new file mode 100644 index 0000000..15a2143 --- /dev/null +++ b/Parser/Style/Subs.pm @@ -0,0 +1,58 @@ +# $Id: Subs.pm,v 1.1 2003/07/27 16:07:49 matt Exp $ + +package XML::Parser::Style::Subs; + +sub Start { + no strict 'refs'; + my $expat = shift; + my $tag = shift; + my $sub = $expat->{Pkg} . "::$tag"; + eval { &$sub($expat, $tag, @_) }; +} + +sub End { + no strict 'refs'; + my $expat = shift; + my $tag = shift; + my $sub = $expat->{Pkg} . "::${tag}_"; + eval { &$sub($expat, $tag) }; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Subs + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Subs', Pkg => 'MySubs'); + $p->parsefile('foo.xml'); + + { + package MySubs; + + sub foo { + # start of foo tag + } + + sub foo_ { + # end of foo tag + } + } + +=head1 DESCRIPTION + +Each time an element starts, a sub by that name in the package specified +by the Pkg option is called with the same parameters that the Start +handler gets called with. + +Each time an element ends, a sub with that name appended with an underscore +("_"), is called with the same parameters that the End handler gets called +with. + +Nothing special is returned by parse. + +=cut
\ No newline at end of file diff --git a/Parser/Style/Tree.pm b/Parser/Style/Tree.pm new file mode 100644 index 0000000..c0e69f1 --- /dev/null +++ b/Parser/Style/Tree.pm @@ -0,0 +1,90 @@ +# $Id: Tree.pm,v 1.2 2003/07/31 07:54:51 matt Exp $ + +package XML::Parser::Style::Tree; +$XML::Parser::Built_In_Styles{Tree} = 1; + +sub Init { + my $expat = shift; + $expat->{Lists} = []; + $expat->{Curlist} = $expat->{Tree} = []; +} + +sub Start { + my $expat = shift; + my $tag = shift; + my $newlist = [ { @_ } ]; + push @{ $expat->{Lists} }, $expat->{Curlist}; + push @{ $expat->{Curlist} }, $tag => $newlist; + $expat->{Curlist} = $newlist; +} + +sub End { + my $expat = shift; + my $tag = shift; + $expat->{Curlist} = pop @{ $expat->{Lists} }; +} + +sub Char { + my $expat = shift; + my $text = shift; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + if ($pos > 0 and $clist->[$pos - 1] eq '0') { + $clist->[$pos] .= $text; + } else { + push @$clist, 0 => $text; + } +} + +sub Final { + my $expat = shift; + delete $expat->{Curlist}; + delete $expat->{Lists}; + $expat->{Tree}; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Tree + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Tree'); + my $tree = $p->parsefile('foo.xml'); + +=head1 DESCRIPTION + +This module implements XML::Parser's Tree style parser. + +When parsing a document, C<parse()> will return a parse tree for the +document. Each node in the tree +takes the form of a tag, content pair. Text nodes are represented with +a pseudo-tag of "0" and the string that is their content. For elements, +the content is an array reference. The first item in the array is a +(possibly empty) hash reference containing attributes. The remainder of +the array is a sequence of tag-content pairs representing the content +of the element. + +So for example the result of parsing: + + <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> + +would be: + Tag Content + ================================================================== + [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], + bar, [ {}, 0, "Howdy", ref, [{}]], + 0, "do" + ] + ] + +The root document "foo", has 3 children: a "head" element, a "bar" +element and the text "do". After the empty attribute hash, these are +represented in it's contents by 3 tag-content pairs. + +=cut |