summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
Diffstat (limited to 't')
-rw-r--r--t/00-all_prereqs.t95
-rw-r--r--t/00-load.t10
-rw-r--r--t/10-find.t133
-rw-r--r--t/20-open.t150
-rw-r--r--t/30-outfile.t61
5 files changed, 449 insertions, 0 deletions
diff --git a/t/00-all_prereqs.t b/t/00-all_prereqs.t
new file mode 100644
index 0000000..668f447
--- /dev/null
+++ b/t/00-all_prereqs.t
@@ -0,0 +1,95 @@
+#!perl
+
+use strict;
+use warnings;
+
+# This doesn't use Test::More because I don't want to clutter %INC
+# with modules that aren't prerequisites.
+
+my $test = 0;
+
+sub ok ($$)
+{
+ my ($ok, $name) = @_;
+
+ printf "%sok %d - %s\n", ($ok ? '' : 'not '), ++$test, $name;
+
+ return $ok;
+} # end ok
+
+END {
+ ok(0, 'unknown failure') unless $test;
+ print "1..$test\n";
+}
+
+sub get_version
+{
+ my ($package) = @_;
+
+ local $@;
+ my $version = eval { $package->VERSION };
+
+ defined $version ? $version : 'undef';
+} # end get_version
+
+TEST: {
+ ok(open(META, '<META.json'), 'opened META.json') or last TEST;
+
+ while (<META>) {
+ last if /^\s*"prereqs" : \{\s*\z/;
+ } # end while <META>
+
+ ok(defined $_, 'found prereqs') or last TEST;
+
+ while (<META>) {
+ last if /^\s*\},?\s*\z/;
+ ok(/^\s*"(.+)" : \{\s*\z/, "found phase $1") or last TEST;
+ my $phase = $1;
+
+ while (<META>) {
+ last if /^\s*\},?\s*\z/;
+ next if /^\s*"[^"]+"\s*:\s*\{\s*\},?\s*\z/;
+ ok(/^\s*"(.+)" : \{\s*\z/, "found relationship $phase $1") or last TEST;
+ my $rel = $1;
+
+ while (<META>) {
+ last if /^\s*\},?\s*\z/;
+ ok(/^\s*"([^"]+)"\s*:\s*(\S+?),?\s*\z/, "found prereq $1")
+ or last TEST;
+ my ($prereq, $version) = ($1, $2);
+
+ next if $phase ne 'runtime' or $prereq eq 'perl';
+
+ # Need a special case for if.pm, because "require if;" is a syntax error.
+ my $loaded = ($prereq eq 'if')
+ ? eval "require '$prereq.pm'; '$prereq'->VERSION($version); 1"
+ : eval "require $prereq; $prereq->VERSION($version); 1";
+ if ($rel eq 'requires') {
+ ok($loaded, "loaded $prereq $version")
+ or printf STDERR "\n# Got: %s %s\n# Wanted: %s %s\n",
+ $prereq, get_version($prereq), $prereq, $version;
+ } else {
+ ok(1, ($loaded ? 'loaded' : 'failed to load') . " $prereq $version");
+ }
+ } # end while <META> in prerequisites
+ } # end while <META> in relationship
+ } # end while <META> in phase
+
+ close META;
+
+ # Print version of all loaded modules:
+ if ($ENV{AUTOMATED_TESTING}) {
+ print STDERR "# Listing %INC\n";
+
+ my @packages = grep { s/\.pm\Z// and do { s![\\/]!::!g; 1 } } sort keys %INC;
+
+ my $len = 0;
+ for (@packages) { $len = length if length > $len }
+ $len = 68 if $len > 68;
+
+ for my $package (@packages) {
+ printf STDERR "# %${len}s %s\n", $package, get_version($package);
+ }
+ } # end if AUTOMATED_TESTING
+} # end TEST
+
diff --git a/t/00-load.t b/t/00-load.t
new file mode 100644
index 0000000..d72f2a0
--- /dev/null
+++ b/t/00-load.t
@@ -0,0 +1,10 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+
+use Test::More tests => 1;
+
+BEGIN {
+ use_ok('IO::HTML');
+}
+
+diag("Testing IO::HTML $IO::HTML::VERSION");
diff --git a/t/10-find.t b/t/10-find.t
new file mode 100644
index 0000000..ef32b1f
--- /dev/null
+++ b/t/10-find.t
@@ -0,0 +1,133 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+# 10-find.t
+# Copyright 2012 Christopher J. Madsen
+#
+# Test the find_charset_in function
+#---------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Test::More 0.88; # done_testing
+use Scalar::Util 'blessed';
+
+use IO::HTML 'find_charset_in';
+
+plan tests => 23;
+
+sub test
+{
+ my $charset = shift;
+ my @data = shift;
+ push @data, shift if ref $_[0]; # options for find_charset_in
+ my $name = shift;
+
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ is(scalar find_charset_in(@data), $charset, $name);
+} # end test
+
+#---------------------------------------------------------------------
+test 'utf-8-strict' => <<'';
+<meta charset="UTF-8">
+
+test 'utf-8-strict' => <<'';
+<!-- UTF-16 is recognized only with a BOM -->
+<meta charset="UTF-16BE">
+
+test 'iso-8859-15' => <<'';
+<meta charset ="ISO-8859-15">
+
+test 'iso-8859-15' => <<'';
+<meta charset= "ISO-8859-15">
+
+test 'iso-8859-15' => <<'';
+<meta charset =
+ "ISO-8859-15">
+
+test 'utf-8-strict' => <<'';
+<meta foo=bar some=" charset =
+ "ISO-8859-15">
+<meta charset="UTF-8">
+
+test 'cp1252' => <<'';
+<meta charset="Windows-1252">
+
+test undef, <<'', 'misspelled charset';
+<meta charseat="Windows-1252">
+
+test 'utf-8-strict' => <<'';
+<meta charset="UTF-8">
+<meta charset="Windows-1252">
+<meta charseat="Windows-1252">
+
+test 'cp1252' => <<'';
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'';
+<html>
+<head><!-- somebody forgot the quotes -->
+<meta http-equiv=Content-Type content=text/html; charset=ISO-8859-15 />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'';
+<html>
+<head><!-- somebody forgot the quotes -->
+<meta http-equiv
+=Content-Type content=text/html; charset=ISO-8859-15 />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'';
+<html>
+<head><!-- different order -->
+<meta content=text/html; charset=ISO-8859-15 http-equiv=Content-Type>
+<title>Title</title>
+
+test 'cp1252' => <<'';
+<html>
+<head>
+<meta content="text/html;charset=ISO-8859-1" http-equiv=Content-Type>
+<title>Title</title>
+
+test undef, <<'', 'incomplete attribute';
+<html>
+<foo href="c06.
+
+test 'iso-8859-15' => <<'', 'short comment';
+<!--><meta charset="ISO-8859-15">-->
+
+test 'iso-8859-15' => <<'', 'strange comment';
+<!---><meta charset="ISO-8859-15">-->
+
+test undef, <<'', 'inside comment';
+<!-- ><meta charset="ISO-8859-15">-->
+
+test undef, <<'', 'wrong pragma';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'utf-8-strict', <<'', {need_pragma => 0}, 'need_pragma 0';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'', 'bogus encoding';
+<meta charset="Totally-Bogus-Encoding-That-Doesnt-Exist">
+<meta charset=ISO-8859-15>
+
+{
+ my $encoding = find_charset_in('<meta charset="UTF-8">', { encoding => 1 });
+
+ ok(blessed($encoding), 'encoding is an object');
+
+ is(eval { $encoding->name }, 'utf-8-strict', 'encoding is UTF-8');
+}
+
+done_testing;
diff --git a/t/20-open.t b/t/20-open.t
new file mode 100644
index 0000000..64d6900
--- /dev/null
+++ b/t/20-open.t
@@ -0,0 +1,150 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+# 20-open.t
+# Copyright 2012 Christopher J. Madsen
+#
+# Actually open files and check the encoding
+#---------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Test::More 0.88;
+
+plan tests => 85;
+
+use IO::HTML;
+use File::Temp;
+use Scalar::Util 'blessed';
+
+#---------------------------------------------------------------------
+sub test
+{
+ my ($expected, $out, $data, $name, $nextArg) = @_;
+
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my $options;
+ if (ref $name) {
+ $options = $name;
+ $name = $nextArg;
+ }
+
+ unless ($name) {
+ $name = 'test ' . ($expected || 'cp1252');
+ }
+
+ my $tmp = File::Temp->new(UNLINK => 1);
+ open(my $mem, '>', \(my $buf)) or die;
+
+ if ($out) {
+ $out = ":encoding($out)" unless $out =~ /^:/;
+ binmode $tmp, $out;
+ binmode $mem, $out;
+ }
+
+ print $mem $data;
+ print $tmp $data;
+ close $mem;
+ $tmp->close;
+
+ my ($fh, $encoding, $bom) = IO::HTML::file_and_encoding("$tmp", $options);
+
+ if ($options and $options->{encoding}) {
+ ok(blessed($encoding), 'returned an object');
+
+ $encoding = eval { $encoding->name };
+ }
+
+ is($encoding, $expected || 'cp1252', $name);
+
+ my $firstLine = <$fh>;
+ like($firstLine, qr/^<html/i);
+
+ close $fh;
+
+ $fh = html_file("$tmp", $options);
+
+ is(<$fh>, $firstLine);
+
+ close $fh;
+
+ # Test sniff_encoding:
+ undef $mem;
+ open($mem, '<', \$buf) or die "Can't open in-memory file: $!";
+
+ delete $options->{encoding} if $options;
+
+ ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);
+
+ is($encoding, $expected);
+
+ seek $mem, 0, 0;
+
+ $options->{encoding} = 1;
+
+ ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);
+
+ if (defined $expected) {
+ ok(blessed($encoding), 'encoding is an object');
+
+ is(eval { $encoding->name }, $expected);
+ } else {
+ is($encoding, undef);
+ }
+} # end test
+
+#---------------------------------------------------------------------
+test 'utf-8-strict' => '' => <<'';
+<html><meta charset="UTF-8">
+
+test 'utf-8-strict' => ':utf8' => <<"";
+<html><head><title>Foo\xA0Bar</title>
+
+test undef, latin1 => <<"";
+<html><head><title>Foo\xA0Bar</title>
+
+test 'UTF-16BE' => 'UTF-16BE' => <<"";
+\x{FeFF}<html><head><title>Foo\xA0Bar</title>
+
+test 'utf-8-strict' => ':utf8' => <<"";
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'utf-8-strict' => ':utf8' => <<"";
+<html><meta charset="UTF-16BE">
+
+test 'UTF-16LE' => 'UTF-16LE' => <<"";
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'UTF-16LE' => 'UTF-16LE' => <<"", { encoding => 1 };
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'utf-8-strict' => ':utf8' => <<"", { encoding => 1, need_pragma => 0 };
+<html><meta charset="UTF-16BE">
+
+test 'utf-8-strict' => ':utf8' =>
+ "<html><title>Foo\xA0Bar" . ("\x{2014}" x 512) . "</title>\n",
+ 'UTF-8 character crosses boundary';
+
+test 'utf-8-strict' => ':utf8' =>
+ "<html><title>Foo Bar" . ("\x{2014}" x 512) . "</title>\n",
+ 'UTF-8 character crosses boundary 2';
+
+test undef, '', <<'', 'wrong pragma';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'utf-8-strict', '', <<'', {need_pragma => 0}, 'need_pragma 0';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'iso-8859-15', '', <<"", { encoding => 1, need_pragma => 0 };
+<html>
+<meta content="text/html; charset=ISO-8859-15">
+<meta charset="UTF-16BE">
+
+done_testing;
diff --git a/t/30-outfile.t b/t/30-outfile.t
new file mode 100644
index 0000000..64e744b
--- /dev/null
+++ b/t/30-outfile.t
@@ -0,0 +1,61 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+# 20-open.t
+# Copyright 2012 Christopher J. Madsen
+#
+# Test the html_outfile function
+#---------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Test::More 0.88;
+
+plan tests => 6;
+
+use IO::HTML ':rw';
+use Encode 'find_encoding';
+use File::Temp;
+
+#---------------------------------------------------------------------
+sub test
+{
+ my ($encoding, $bom, $expected) = @_;
+
+ my $name = ref $encoding ? $encoding->name . " object" : $encoding;
+ $name .= ($bom ? ' with BOM' : ' without BOM') if defined $bom;
+
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my $tmp = File::Temp->new(UNLINK => 1);
+ $tmp->close;
+
+ my $fh = html_outfile("$tmp", $encoding, $bom);
+
+ print $fh "\xA0\x{2014}";
+
+ close $fh;
+
+ open(my $in, '<:raw', "$tmp") or die $!;
+
+ my $got = do { local $/; <$in> };
+
+ close $in;
+
+ is(unpack('H*', $got), $expected, $name);
+} # end test
+
+#---------------------------------------------------------------------
+test 'utf-8-strict', 0, 'c2a0e28094';
+
+test 'utf-8-strict', 1, 'efbbbfc2a0e28094';
+
+test cp1252 => undef, 'a097';
+
+test 'UTF-16BE', 1, 'feff00a02014';
+
+test 'UTF-16LE', 1, 'fffea0001420';
+
+test find_encoding('UTF-8'), 0, 'c2a0e28094';
+
+done_testing;