diff options
Diffstat (limited to 't')
-rw-r--r-- | t/00-all_prereqs.t | 95 | ||||
-rw-r--r-- | t/00-load.t | 10 | ||||
-rw-r--r-- | t/10-find.t | 133 | ||||
-rw-r--r-- | t/20-open.t | 150 | ||||
-rw-r--r-- | t/30-outfile.t | 61 |
5 files changed, 449 insertions, 0 deletions
diff --git a/t/00-all_prereqs.t b/t/00-all_prereqs.t new file mode 100644 index 0000000..668f447 --- /dev/null +++ b/t/00-all_prereqs.t @@ -0,0 +1,95 @@ +#!perl + +use strict; +use warnings; + +# This doesn't use Test::More because I don't want to clutter %INC +# with modules that aren't prerequisites. + +my $test = 0; + +sub ok ($$) +{ + my ($ok, $name) = @_; + + printf "%sok %d - %s\n", ($ok ? '' : 'not '), ++$test, $name; + + return $ok; +} # end ok + +END { + ok(0, 'unknown failure') unless $test; + print "1..$test\n"; +} + +sub get_version +{ + my ($package) = @_; + + local $@; + my $version = eval { $package->VERSION }; + + defined $version ? $version : 'undef'; +} # end get_version + +TEST: { + ok(open(META, '<META.json'), 'opened META.json') or last TEST; + + while (<META>) { + last if /^\s*"prereqs" : \{\s*\z/; + } # end while <META> + + ok(defined $_, 'found prereqs') or last TEST; + + while (<META>) { + last if /^\s*\},?\s*\z/; + ok(/^\s*"(.+)" : \{\s*\z/, "found phase $1") or last TEST; + my $phase = $1; + + while (<META>) { + last if /^\s*\},?\s*\z/; + next if /^\s*"[^"]+"\s*:\s*\{\s*\},?\s*\z/; + ok(/^\s*"(.+)" : \{\s*\z/, "found relationship $phase $1") or last TEST; + my $rel = $1; + + while (<META>) { + last if /^\s*\},?\s*\z/; + ok(/^\s*"([^"]+)"\s*:\s*(\S+?),?\s*\z/, "found prereq $1") + or last TEST; + my ($prereq, $version) = ($1, $2); + + next if $phase ne 'runtime' or $prereq eq 'perl'; + + # Need a special case for if.pm, because "require if;" is a syntax error. + my $loaded = ($prereq eq 'if') + ? eval "require '$prereq.pm'; '$prereq'->VERSION($version); 1" + : eval "require $prereq; $prereq->VERSION($version); 1"; + if ($rel eq 'requires') { + ok($loaded, "loaded $prereq $version") + or printf STDERR "\n# Got: %s %s\n# Wanted: %s %s\n", + $prereq, get_version($prereq), $prereq, $version; + } else { + ok(1, ($loaded ? 'loaded' : 'failed to load') . " $prereq $version"); + } + } # end while <META> in prerequisites + } # end while <META> in relationship + } # end while <META> in phase + + close META; + + # Print version of all loaded modules: + if ($ENV{AUTOMATED_TESTING}) { + print STDERR "# Listing %INC\n"; + + my @packages = grep { s/\.pm\Z// and do { s![\\/]!::!g; 1 } } sort keys %INC; + + my $len = 0; + for (@packages) { $len = length if length > $len } + $len = 68 if $len > 68; + + for my $package (@packages) { + printf STDERR "# %${len}s %s\n", $package, get_version($package); + } + } # end if AUTOMATED_TESTING +} # end TEST + diff --git a/t/00-load.t b/t/00-load.t new file mode 100644 index 0000000..d72f2a0 --- /dev/null +++ b/t/00-load.t @@ -0,0 +1,10 @@ +#! /usr/bin/perl +#--------------------------------------------------------------------- + +use Test::More tests => 1; + +BEGIN { + use_ok('IO::HTML'); +} + +diag("Testing IO::HTML $IO::HTML::VERSION"); diff --git a/t/10-find.t b/t/10-find.t new file mode 100644 index 0000000..ef32b1f --- /dev/null +++ b/t/10-find.t @@ -0,0 +1,133 @@ +#! /usr/bin/perl +#--------------------------------------------------------------------- +# 10-find.t +# Copyright 2012 Christopher J. Madsen +# +# Test the find_charset_in function +#--------------------------------------------------------------------- + +use strict; +use warnings; + +use Test::More 0.88; # done_testing +use Scalar::Util 'blessed'; + +use IO::HTML 'find_charset_in'; + +plan tests => 23; + +sub test +{ + my $charset = shift; + my @data = shift; + push @data, shift if ref $_[0]; # options for find_charset_in + my $name = shift; + + local $Test::Builder::Level = $Test::Builder::Level + 1; + + is(scalar find_charset_in(@data), $charset, $name); +} # end test + +#--------------------------------------------------------------------- +test 'utf-8-strict' => <<''; +<meta charset="UTF-8"> + +test 'utf-8-strict' => <<''; +<!-- UTF-16 is recognized only with a BOM --> +<meta charset="UTF-16BE"> + +test 'iso-8859-15' => <<''; +<meta charset ="ISO-8859-15"> + +test 'iso-8859-15' => <<''; +<meta charset= "ISO-8859-15"> + +test 'iso-8859-15' => <<''; +<meta charset = + "ISO-8859-15"> + +test 'utf-8-strict' => <<''; +<meta foo=bar some=" charset = + "ISO-8859-15"> +<meta charset="UTF-8"> + +test 'cp1252' => <<''; +<meta charset="Windows-1252"> + +test undef, <<'', 'misspelled charset'; +<meta charseat="Windows-1252"> + +test 'utf-8-strict' => <<''; +<meta charset="UTF-8"> +<meta charset="Windows-1252"> +<meta charseat="Windows-1252"> + +test 'cp1252' => <<''; +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /> +<title>Title</title> + +test 'iso-8859-15' => <<''; +<html> +<head><!-- somebody forgot the quotes --> +<meta http-equiv=Content-Type content=text/html; charset=ISO-8859-15 /> +<title>Title</title> + +test 'iso-8859-15' => <<''; +<html> +<head><!-- somebody forgot the quotes --> +<meta http-equiv +=Content-Type content=text/html; charset=ISO-8859-15 /> +<title>Title</title> + +test 'iso-8859-15' => <<''; +<html> +<head><!-- different order --> +<meta content=text/html; charset=ISO-8859-15 http-equiv=Content-Type> +<title>Title</title> + +test 'cp1252' => <<''; +<html> +<head> +<meta content="text/html;charset=ISO-8859-1" http-equiv=Content-Type> +<title>Title</title> + +test undef, <<'', 'incomplete attribute'; +<html> +<foo href="c06. + +test 'iso-8859-15' => <<'', 'short comment'; +<!--><meta charset="ISO-8859-15">--> + +test 'iso-8859-15' => <<'', 'strange comment'; +<!---><meta charset="ISO-8859-15">--> + +test undef, <<'', 'inside comment'; +<!-- ><meta charset="ISO-8859-15">--> + +test undef, <<'', 'wrong pragma'; +<html> +<head> +<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" /> +<title>Title</title> + +test 'utf-8-strict', <<'', {need_pragma => 0}, 'need_pragma 0'; +<html> +<head> +<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" /> +<title>Title</title> + +test 'iso-8859-15' => <<'', 'bogus encoding'; +<meta charset="Totally-Bogus-Encoding-That-Doesnt-Exist"> +<meta charset=ISO-8859-15> + +{ + my $encoding = find_charset_in('<meta charset="UTF-8">', { encoding => 1 }); + + ok(blessed($encoding), 'encoding is an object'); + + is(eval { $encoding->name }, 'utf-8-strict', 'encoding is UTF-8'); +} + +done_testing; diff --git a/t/20-open.t b/t/20-open.t new file mode 100644 index 0000000..64d6900 --- /dev/null +++ b/t/20-open.t @@ -0,0 +1,150 @@ +#! /usr/bin/perl +#--------------------------------------------------------------------- +# 20-open.t +# Copyright 2012 Christopher J. Madsen +# +# Actually open files and check the encoding +#--------------------------------------------------------------------- + +use strict; +use warnings; + +use Test::More 0.88; + +plan tests => 85; + +use IO::HTML; +use File::Temp; +use Scalar::Util 'blessed'; + +#--------------------------------------------------------------------- +sub test +{ + my ($expected, $out, $data, $name, $nextArg) = @_; + + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $options; + if (ref $name) { + $options = $name; + $name = $nextArg; + } + + unless ($name) { + $name = 'test ' . ($expected || 'cp1252'); + } + + my $tmp = File::Temp->new(UNLINK => 1); + open(my $mem, '>', \(my $buf)) or die; + + if ($out) { + $out = ":encoding($out)" unless $out =~ /^:/; + binmode $tmp, $out; + binmode $mem, $out; + } + + print $mem $data; + print $tmp $data; + close $mem; + $tmp->close; + + my ($fh, $encoding, $bom) = IO::HTML::file_and_encoding("$tmp", $options); + + if ($options and $options->{encoding}) { + ok(blessed($encoding), 'returned an object'); + + $encoding = eval { $encoding->name }; + } + + is($encoding, $expected || 'cp1252', $name); + + my $firstLine = <$fh>; + like($firstLine, qr/^<html/i); + + close $fh; + + $fh = html_file("$tmp", $options); + + is(<$fh>, $firstLine); + + close $fh; + + # Test sniff_encoding: + undef $mem; + open($mem, '<', \$buf) or die "Can't open in-memory file: $!"; + + delete $options->{encoding} if $options; + + ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options); + + is($encoding, $expected); + + seek $mem, 0, 0; + + $options->{encoding} = 1; + + ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options); + + if (defined $expected) { + ok(blessed($encoding), 'encoding is an object'); + + is(eval { $encoding->name }, $expected); + } else { + is($encoding, undef); + } +} # end test + +#--------------------------------------------------------------------- +test 'utf-8-strict' => '' => <<''; +<html><meta charset="UTF-8"> + +test 'utf-8-strict' => ':utf8' => <<""; +<html><head><title>Foo\xA0Bar</title> + +test undef, latin1 => <<""; +<html><head><title>Foo\xA0Bar</title> + +test 'UTF-16BE' => 'UTF-16BE' => <<""; +\x{FeFF}<html><head><title>Foo\xA0Bar</title> + +test 'utf-8-strict' => ':utf8' => <<""; +\x{FeFF}<html><meta charset="UTF-16"> + +test 'utf-8-strict' => ':utf8' => <<""; +<html><meta charset="UTF-16BE"> + +test 'UTF-16LE' => 'UTF-16LE' => <<""; +\x{FeFF}<html><meta charset="UTF-16"> + +test 'UTF-16LE' => 'UTF-16LE' => <<"", { encoding => 1 }; +\x{FeFF}<html><meta charset="UTF-16"> + +test 'utf-8-strict' => ':utf8' => <<"", { encoding => 1, need_pragma => 0 }; +<html><meta charset="UTF-16BE"> + +test 'utf-8-strict' => ':utf8' => + "<html><title>Foo\xA0Bar" . ("\x{2014}" x 512) . "</title>\n", + 'UTF-8 character crosses boundary'; + +test 'utf-8-strict' => ':utf8' => + "<html><title>Foo Bar" . ("\x{2014}" x 512) . "</title>\n", + 'UTF-8 character crosses boundary 2'; + +test undef, '', <<'', 'wrong pragma'; +<html> +<head> +<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" /> +<title>Title</title> + +test 'utf-8-strict', '', <<'', {need_pragma => 0}, 'need_pragma 0'; +<html> +<head> +<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" /> +<title>Title</title> + +test 'iso-8859-15', '', <<"", { encoding => 1, need_pragma => 0 }; +<html> +<meta content="text/html; charset=ISO-8859-15"> +<meta charset="UTF-16BE"> + +done_testing; diff --git a/t/30-outfile.t b/t/30-outfile.t new file mode 100644 index 0000000..64e744b --- /dev/null +++ b/t/30-outfile.t @@ -0,0 +1,61 @@ +#! /usr/bin/perl +#--------------------------------------------------------------------- +# 20-open.t +# Copyright 2012 Christopher J. Madsen +# +# Test the html_outfile function +#--------------------------------------------------------------------- + +use strict; +use warnings; + +use Test::More 0.88; + +plan tests => 6; + +use IO::HTML ':rw'; +use Encode 'find_encoding'; +use File::Temp; + +#--------------------------------------------------------------------- +sub test +{ + my ($encoding, $bom, $expected) = @_; + + my $name = ref $encoding ? $encoding->name . " object" : $encoding; + $name .= ($bom ? ' with BOM' : ' without BOM') if defined $bom; + + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $tmp = File::Temp->new(UNLINK => 1); + $tmp->close; + + my $fh = html_outfile("$tmp", $encoding, $bom); + + print $fh "\xA0\x{2014}"; + + close $fh; + + open(my $in, '<:raw', "$tmp") or die $!; + + my $got = do { local $/; <$in> }; + + close $in; + + is(unpack('H*', $got), $expected, $name); +} # end test + +#--------------------------------------------------------------------- +test 'utf-8-strict', 0, 'c2a0e28094'; + +test 'utf-8-strict', 1, 'efbbbfc2a0e28094'; + +test cp1252 => undef, 'a097'; + +test 'UTF-16BE', 1, 'feff00a02014'; + +test 'UTF-16LE', 1, 'fffea0001420'; + +test find_encoding('UTF-8'), 0, 'c2a0e28094'; + +done_testing; |