summaryrefslogtreecommitdiff
path: root/t/20-open.t
diff options
context:
space:
mode:
Diffstat (limited to 't/20-open.t')
-rw-r--r--t/20-open.t150
1 files changed, 150 insertions, 0 deletions
diff --git a/t/20-open.t b/t/20-open.t
new file mode 100644
index 0000000..64d6900
--- /dev/null
+++ b/t/20-open.t
@@ -0,0 +1,150 @@
+#! /usr/bin/perl
+#---------------------------------------------------------------------
+# 20-open.t
+# Copyright 2012 Christopher J. Madsen
+#
+# Actually open files and check the encoding
+#---------------------------------------------------------------------
+
+use strict;
+use warnings;
+
+use Test::More 0.88;
+
+plan tests => 85;
+
+use IO::HTML;
+use File::Temp;
+use Scalar::Util 'blessed';
+
+#---------------------------------------------------------------------
+sub test
+{
+ my ($expected, $out, $data, $name, $nextArg) = @_;
+
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my $options;
+ if (ref $name) {
+ $options = $name;
+ $name = $nextArg;
+ }
+
+ unless ($name) {
+ $name = 'test ' . ($expected || 'cp1252');
+ }
+
+ my $tmp = File::Temp->new(UNLINK => 1);
+ open(my $mem, '>', \(my $buf)) or die;
+
+ if ($out) {
+ $out = ":encoding($out)" unless $out =~ /^:/;
+ binmode $tmp, $out;
+ binmode $mem, $out;
+ }
+
+ print $mem $data;
+ print $tmp $data;
+ close $mem;
+ $tmp->close;
+
+ my ($fh, $encoding, $bom) = IO::HTML::file_and_encoding("$tmp", $options);
+
+ if ($options and $options->{encoding}) {
+ ok(blessed($encoding), 'returned an object');
+
+ $encoding = eval { $encoding->name };
+ }
+
+ is($encoding, $expected || 'cp1252', $name);
+
+ my $firstLine = <$fh>;
+ like($firstLine, qr/^<html/i);
+
+ close $fh;
+
+ $fh = html_file("$tmp", $options);
+
+ is(<$fh>, $firstLine);
+
+ close $fh;
+
+ # Test sniff_encoding:
+ undef $mem;
+ open($mem, '<', \$buf) or die "Can't open in-memory file: $!";
+
+ delete $options->{encoding} if $options;
+
+ ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);
+
+ is($encoding, $expected);
+
+ seek $mem, 0, 0;
+
+ $options->{encoding} = 1;
+
+ ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options);
+
+ if (defined $expected) {
+ ok(blessed($encoding), 'encoding is an object');
+
+ is(eval { $encoding->name }, $expected);
+ } else {
+ is($encoding, undef);
+ }
+} # end test
+
+#---------------------------------------------------------------------
+test 'utf-8-strict' => '' => <<'';
+<html><meta charset="UTF-8">
+
+test 'utf-8-strict' => ':utf8' => <<"";
+<html><head><title>Foo\xA0Bar</title>
+
+test undef, latin1 => <<"";
+<html><head><title>Foo\xA0Bar</title>
+
+test 'UTF-16BE' => 'UTF-16BE' => <<"";
+\x{FeFF}<html><head><title>Foo\xA0Bar</title>
+
+test 'utf-8-strict' => ':utf8' => <<"";
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'utf-8-strict' => ':utf8' => <<"";
+<html><meta charset="UTF-16BE">
+
+test 'UTF-16LE' => 'UTF-16LE' => <<"";
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'UTF-16LE' => 'UTF-16LE' => <<"", { encoding => 1 };
+\x{FeFF}<html><meta charset="UTF-16">
+
+test 'utf-8-strict' => ':utf8' => <<"", { encoding => 1, need_pragma => 0 };
+<html><meta charset="UTF-16BE">
+
+test 'utf-8-strict' => ':utf8' =>
+ "<html><title>Foo\xA0Bar" . ("\x{2014}" x 512) . "</title>\n",
+ 'UTF-8 character crosses boundary';
+
+test 'utf-8-strict' => ':utf8' =>
+ "<html><title>Foo Bar" . ("\x{2014}" x 512) . "</title>\n",
+ 'UTF-8 character crosses boundary 2';
+
+test undef, '', <<'', 'wrong pragma';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'utf-8-strict', '', <<'', {need_pragma => 0}, 'need_pragma 0';
+<html>
+<head>
+<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" />
+<title>Title</title>
+
+test 'iso-8859-15', '', <<"", { encoding => 1, need_pragma => 0 };
+<html>
+<meta content="text/html; charset=ISO-8859-15">
+<meta charset="UTF-16BE">
+
+done_testing;