diff options
Diffstat (limited to 't/20-open.t')
-rw-r--r-- | t/20-open.t | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/t/20-open.t b/t/20-open.t new file mode 100644 index 0000000..64d6900 --- /dev/null +++ b/t/20-open.t @@ -0,0 +1,150 @@ +#! /usr/bin/perl +#--------------------------------------------------------------------- +# 20-open.t +# Copyright 2012 Christopher J. Madsen +# +# Actually open files and check the encoding +#--------------------------------------------------------------------- + +use strict; +use warnings; + +use Test::More 0.88; + +plan tests => 85; + +use IO::HTML; +use File::Temp; +use Scalar::Util 'blessed'; + +#--------------------------------------------------------------------- +sub test +{ + my ($expected, $out, $data, $name, $nextArg) = @_; + + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $options; + if (ref $name) { + $options = $name; + $name = $nextArg; + } + + unless ($name) { + $name = 'test ' . ($expected || 'cp1252'); + } + + my $tmp = File::Temp->new(UNLINK => 1); + open(my $mem, '>', \(my $buf)) or die; + + if ($out) { + $out = ":encoding($out)" unless $out =~ /^:/; + binmode $tmp, $out; + binmode $mem, $out; + } + + print $mem $data; + print $tmp $data; + close $mem; + $tmp->close; + + my ($fh, $encoding, $bom) = IO::HTML::file_and_encoding("$tmp", $options); + + if ($options and $options->{encoding}) { + ok(blessed($encoding), 'returned an object'); + + $encoding = eval { $encoding->name }; + } + + is($encoding, $expected || 'cp1252', $name); + + my $firstLine = <$fh>; + like($firstLine, qr/^<html/i); + + close $fh; + + $fh = html_file("$tmp", $options); + + is(<$fh>, $firstLine); + + close $fh; + + # Test sniff_encoding: + undef $mem; + open($mem, '<', \$buf) or die "Can't open in-memory file: $!"; + + delete $options->{encoding} if $options; + + ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options); + + is($encoding, $expected); + + seek $mem, 0, 0; + + $options->{encoding} = 1; + + ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options); + + if (defined $expected) { + ok(blessed($encoding), 'encoding is an object'); + + is(eval { $encoding->name }, $expected); + } else { + is($encoding, undef); + } +} # end test + +#--------------------------------------------------------------------- +test 'utf-8-strict' => '' => <<''; +<html><meta charset="UTF-8"> + +test 'utf-8-strict' => ':utf8' => <<""; +<html><head><title>Foo\xA0Bar</title> + +test undef, latin1 => <<""; +<html><head><title>Foo\xA0Bar</title> + +test 'UTF-16BE' => 'UTF-16BE' => <<""; +\x{FeFF}<html><head><title>Foo\xA0Bar</title> + +test 'utf-8-strict' => ':utf8' => <<""; +\x{FeFF}<html><meta charset="UTF-16"> + +test 'utf-8-strict' => ':utf8' => <<""; +<html><meta charset="UTF-16BE"> + +test 'UTF-16LE' => 'UTF-16LE' => <<""; +\x{FeFF}<html><meta charset="UTF-16"> + +test 'UTF-16LE' => 'UTF-16LE' => <<"", { encoding => 1 }; +\x{FeFF}<html><meta charset="UTF-16"> + +test 'utf-8-strict' => ':utf8' => <<"", { encoding => 1, need_pragma => 0 }; +<html><meta charset="UTF-16BE"> + +test 'utf-8-strict' => ':utf8' => + "<html><title>Foo\xA0Bar" . ("\x{2014}" x 512) . "</title>\n", + 'UTF-8 character crosses boundary'; + +test 'utf-8-strict' => ':utf8' => + "<html><title>Foo Bar" . ("\x{2014}" x 512) . "</title>\n", + 'UTF-8 character crosses boundary 2'; + +test undef, '', <<'', 'wrong pragma'; +<html> +<head> +<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" /> +<title>Title</title> + +test 'utf-8-strict', '', <<'', {need_pragma => 0}, 'need_pragma 0'; +<html> +<head> +<meta http-equiv="X-Content-Type" content="text/html; charset=UTF-8" /> +<title>Title</title> + +test 'iso-8859-15', '', <<"", { encoding => 1, need_pragma => 0 }; +<html> +<meta content="text/html; charset=ISO-8859-15"> +<meta charset="UTF-16BE"> + +done_testing; |