5 files changed, 449 insertions, 0 deletions
diff --git a/t/00-all_prereqs.t b/t/00-all_prereqs.t
new file mode 100644
index 0000000..668f447
--- /dev/null
+++ b/t/00-all_prereqs.t
@@ -0,0 +1,95 @@
+#!perl
+
+use strict;
+use warnings;
+
+# This doesn't use Test::More because I don't want to clutter %INC
+# with modules that aren't prerequisites.
+
+my $test = 0;
+
+sub ok ($$)
+{
+  my ($ok, $name) = @_;
+
+  printf "%sok %d - %s\n", ($ok ? '' : 'not '), ++$test, $name;
+
+  return $ok;
+} # end ok
+
+END {
+  ok(0, 'unknown failure') unless $test;
+  print "1..$test\n";
+}
+
+sub get_version
+{
+  my ($package) = @_;
+
+  local $@;
+  my $version = eval { $package->VERSION };
+
+  defined $version ? $version : 'undef';
+} # end get_version
+
+TEST: {
+  ok(open(META, '<META.json'), 'opened META.json') or last TEST;
+
+  while (<META>) {
+     last if /^\s*"prereqs" : \{\s*\z/;
+  } # end while <META>
+
+  ok(defined $_, 'found prereqs') or last TEST;
+
+  while (<META>) {
+    last if /^\s*\},?\s*\z/;
+    ok(/^\s*"(.+)" : \{\s*\z/, "found phase $1") or last TEST;
+    my $phase = $1;
+
+    while (<META>) {
+      last if /^\s*\},?\s*\z/;
+      next if /^\s*"[^"]+"\s*:\s*\{\s*\},?\s*\z/;
+      ok(/^\s*"(.+)" : \{\s*\z/, "found relationship $phase $1") or last TEST;
+      my $rel = $1;
+
+      while (<META>) {
+        last if /^\s*\},?\s*\z/;
+        ok(/^\s*"([^"]+)"\s*:\s*(\S+?),?\s*\z/, "found prereq $1")
+            or last TEST;
+        my ($prereq, $version) = ($1, $2);
+
+        next if $phase ne 'runtime' or $prereq eq 'perl';
+
+        # Need a special case for if.pm, because "require if;" is a syntax error.
+        my $loaded = ($prereq eq 'if')
+            ? eval "require '$prereq.pm'; '$prereq'->VERSION($version); 1"
+            : eval "require $prereq; $prereq->VERSION($version); 1";
+        if ($rel eq 'requires') {
+          ok($loaded, "loaded $prereq $version")
+              or printf STDERR "\n#    Got: %s %s\n# Wanted: %s %s\n",
+                  $prereq, get_version($prereq), $prereq, $version;
+        } else {
+          ok(1, ($loaded ? 'loaded' : 'failed to load') . " $prereq $version");
+        }
+      } # end while <META> in prerequisites
+    } # end while <META> in relationship
+  } # end while <META> in phase
+
+  close META;
+
+  # Print version of all loaded modules:
+  if ($ENV{AUTOMATED_TESTING}) {
+    print STDERR "# Listing %INC\n";
+
+    my @packages = grep { s/\.pm\Z// and do { s![\\/]!::!g; 1 } } sort keys %INC;
+
+    my $len = 0;
+    for (@packages) { $len = length if length > $len }
+    $len = 68 if $len > 68;
+
+    for my $package (@packages) {
+      printf STDERR "# %${len}s %s\n", $package, get_version($package);
+    }
+  } # end if AUTOMATED_TESTING
+} # end TEST
+
diff --git a/t/00-load.t b/t/00-load.t
new file mode 100644
index 0000000..d72f2a0
--- /dev/null
+++ b/t/00-load.t
@@ -0,0 +1,10 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+
+use Test::More tests => 1;
+
+BEGIN {
+    use_ok('IO::HTML');
+}
+
+diag("Testing IO::HTML $IO::HTML::VERSION");
diff --git a/t/10-find.t b/t/10-find.t
new file mode 100644
index 0000000..ef32b1f
--- /dev/null
+++ b/t/10-find.t
@@ -0,0 +1,133 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+# 10-find.t
+# Copyright 2012 Christopher J. Madsen
+#
+# Test the find_charset_in function
+#---------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Test::More 0.88;            # done_testing
+use Scalar::Util 'blessed';
+
+use IO::HTML 'find_charset_in';
+
+plan tests => 23;
+
+sub test
+{
+  my $charset = shift;
+  my @data = shift;
+  push @data, shift if ref $_[0]; # options for find_charset_in
+  my $name = shift;
+
+  local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+  is(scalar find_charset_in(@data), $charset, $name);
+} # end test
+
+#---------------------------------------------------------------------
+test 'utf-8-strict' => <<'';
+<meta charset="UTF-8">
+
+test 'utf-8-strict' => <<'';
+<!-- UTF-16 is recognized only with a BOM -->
+<meta charset="UTF-16BE">
+
+test 'iso-8859-15' => <<'';
+<meta charset ="ISO-8859-15">
+
+test 'iso-8859-15' => <<'';
+<meta charset= "ISO-8859-15">
+
+test 'iso-8859-15' => <<'';
+<meta charset =
+ "ISO-8859-15">
+
+test 'utf-8-strict' => <<'';
+<meta foo=bar some=" charset =
+ "ISO-8859-15">
+<meta charset="UTF-8">
+
+test 'cp1252' => <<'';
+<meta charset="Windows-1252">
+
+test undef, <<'', 'misspelled charset';
+<meta charseat="Windows-1252">
+
+test 'utf-8-strict' => <<'';
+<meta charset="UTF-8">
+<meta charset="Windows-1252">
+<meta charseat="Windows-1252">
+
+test 'cp1252' => <<'';
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'';
+<html>
+<head><!-- somebody forgot the quotes -->
+<meta http-equiv=Content-Type content=text/html; charset=ISO-8859-15 />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'';
+<html>
+<head><!-- somebody forgot the quotes -->
+<meta http-equiv
+=Content-Type content=text/html; charset=ISO-8859-15 />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'';
+<html>
+<head><!-- different order -->
+<meta content=text/html; charset=ISO-8859-15 http-equiv=Content-Type>
+<title>Title</title>
+
+test 'cp1252' => <<'';
+<html>
+<head>
+<meta content="text/html;charset=ISO-8859-1" http-equiv=Content-Type>
+<title>Title</title>
+
+test undef, <<'', 'incomplete attribute';
+<html>
+<foo href="c06.
+
+test 'iso-8859-15' => <<'', 'short comment';
+<!--><meta charset="ISO-8859-15">-->
+
+test 'iso-8859-15' => <<'', 'strange comment';
+<!---><meta charset="ISO-8859-15">-->
+
+test undef, <<'', 'inside comment';
+<!-- ><meta charset="ISO-8859-15">-->
+
+test undef, <<'', 'wrong pragma';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'utf-8-strict', <<'', {need_pragma => 0}, 'need_pragma 0';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'iso-8859-15' => <<'', 'bogus encoding';
+<meta charset="Totally-Bogus-Encoding-That-Doesnt-Exist">
+<meta charset=ISO-8859-15>
+
+{
+  my $encoding = find_charset_in('<meta charset="UTF-8">', { encoding => 1 });
+
+  ok(blessed($encoding), 'encoding is an object');
+
+  is(eval { $encoding->name }, 'utf-8-strict', 'encoding is UTF-8');
+}
+
+done_testing;
diff --git a/t/20-open.t b/t/20-open.t
new file mode 100644
index 0000000..64d6900
--- /dev/null
+++ b/t/20-open.t
@@ -0,0 +1,150 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+# 20-open.t
+# Copyright 2012 Christopher J. Madsen
+#
+# Actually open files and check the encoding
+#---------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Test::More 0.88;
+
+plan tests => 85;
+
+use IO::HTML;
+use File::Temp;
+use Scalar::Util 'blessed';
+
+#---------------------------------------------------------------------
+sub test
+{
+  my ($expected, $out, $data, $name, $nextArg) = @_;
+
+  local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+  my $options;
+  if (ref $name) {
+    $options = $name;
+    $name    = $nextArg;
+  }
+
+  unless ($name) {
+    $name = 'test ' . ($expected || 'cp1252');
+  }
+
+  my $tmp = File::Temp->new(UNLINK => 1);
+  open(my $mem, '>', \(my $buf)) or die;
+
+  if ($out) {
+    $out = ":encoding($out)" unless $out =~ /^:/;
+    binmode $tmp, $out;
+    binmode $mem, $out;
+  }
+
+  print $mem $data;
+  print $tmp $data;
+  close $mem;
+  $tmp->close;
+
+  my ($fh, $encoding, $bom) = IO::HTML::file_and_encoding("$tmp", $options);
+
+  if ($options and $options->{encoding}) {
+    ok(blessed($encoding), 'returned an object');
+
+    $encoding = eval { $encoding->name };
+  }
+
+  is($encoding, $expected || 'cp1252', $name);
+
+  my $firstLine = <$fh>;
+  like($firstLine, qr/^<html/i);
+
+  close $fh;
+
+  $fh = html_file("$tmp", $options);
+
+  is(<$fh>, $firstLine);
+
+  close $fh;
+
+  # Test sniff_encoding:
+  undef $mem;
+  open($mem, '<', \$buf) or die "Can't open in-memory file: $!";
+
+  delete $options->{encoding} if $options;
+
+  ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);
+
+  is($encoding, $expected);
+
+  seek $mem, 0, 0;
+
+  $options->{encoding} = 1;
+
+  ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);
+
+  if (defined $expected) {
+    ok(blessed($encoding), 'encoding is an object');
+
+    is(eval { $encoding->name }, $expected);
+  } else {
+    is($encoding, undef);
+  }
+} # end test
+
+#---------------------------------------------------------------------
+test 'utf-8-strict' => '' => <<'';
+<html><meta charset="UTF-8">
+
+test 'utf-8-strict' => ':utf8' => <<"";
+<html><head><title>Foo\xA0Bar</title>
+
+test undef, latin1 => <<"";
+<html><head><title>Foo\xA0Bar</title>
+
+test 'UTF-16BE' => 'UTF-16BE' => <<"";
+\x{FeFF}<html><head><title>Foo\xA0Bar</title>
+
+test 'utf-8-strict' => ':utf8' => <<"";
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'utf-8-strict' => ':utf8' => <<"";
+<html><meta charset="UTF-16BE">
+
+test 'UTF-16LE' => 'UTF-16LE' => <<"";
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'UTF-16LE' => 'UTF-16LE' => <<"", { encoding => 1 };
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'utf-8-strict' => ':utf8' => <<"", { encoding => 1, need_pragma => 0 };
+<html><meta charset="UTF-16BE">
+
+test 'utf-8-strict' => ':utf8' =>
+  "<html><title>Foo\xA0Bar" . ("\x{2014}" x 512) . "</title>\n",
+  'UTF-8 character crosses boundary';
+
+test 'utf-8-strict' => ':utf8' =>
+  "<html><title>Foo Bar" . ("\x{2014}" x 512) . "</title>\n",
+  'UTF-8 character crosses boundary 2';
+
+test undef, '', <<'', 'wrong pragma';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'utf-8-strict', '', <<'', {need_pragma => 0}, 'need_pragma 0';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'iso-8859-15', '', <<"", { encoding => 1, need_pragma => 0 };
+<html>
+<meta content="text/html; charset=ISO-8859-15">
+<meta charset="UTF-16BE">
+
+done_testing;
diff --git a/t/30-outfile.t b/t/30-outfile.t
new file mode 100644
index 0000000..64e744b
--- /dev/null
+++ b/t/30-outfile.t
@@ -0,0 +1,61 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+# 20-open.t
+# Copyright 2012 Christopher J. Madsen
+#
+# Test the html_outfile function
+#---------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Test::More 0.88;
+
+plan tests => 6;
+
+use IO::HTML ':rw';
+use Encode 'find_encoding';
+use File::Temp;
+
+#---------------------------------------------------------------------
+sub test
+{
+  my ($encoding, $bom, $expected) = @_;
+
+  my $name = ref $encoding ? $encoding->name . " object" : $encoding;
+  $name .= ($bom ? ' with BOM' : ' without BOM') if defined $bom;
+
+  local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+  my $tmp = File::Temp->new(UNLINK => 1);
+  $tmp->close;
+
+  my $fh = html_outfile("$tmp", $encoding, $bom);
+
+  print $fh "\xA0\x{2014}";
+
+  close $fh;
+
+  open(my $in, '<:raw', "$tmp") or die $!;
+
+  my $got = do { local $/; <$in> };
+
+  close $in;
+
+  is(unpack('H*', $got), $expected, $name);
+} # end test
+
+#---------------------------------------------------------------------
+test 'utf-8-strict', 0, 'c2a0e28094';
+
+test 'utf-8-strict', 1, 'efbbbfc2a0e28094';
+
+test cp1252 => undef, 'a097';
+
+test 'UTF-16BE', 1, 'feff00a02014';
+
+test 'UTF-16LE', 1, 'fffea0001420';
+
+test find_encoding('UTF-8'), 0, 'c2a0e28094';
+
+done_testing;