Leser: 20
1 2 3 4 5 6 7 8 9 10 11 12 13
#!/usr/bin/perl -w use strict; use warnings; use Encode; use encoding "utf8"; use XML::LibXML; foreach(@files) { my $parser = XML::LibXML->new({recover => 2, suppress_errors => 1, suppress_warnings => 1, encoding => "utf8"}); my $tree = $parser->load_html(location => $_); ... }
Quote(...)These errors can be caught by using eval blocks.(...)
1 2 3 4 5 6 7 8 9 10 11 12 13
# Testet, ob ein Text UTF-8 kodiert ist sub is_utf8{ my $text = shift; no warnings; use bytes; # es sind bytes zu verglichen # text in latin umwandeln, iso-8859-1 my $iso = pack('C*', unpack('U0U*', $text)); # diesen text wieder in utf-8 kodieren my $utf = pack('U0U*', unpack('C*', $iso)); # wenn beide Bytes-Ketten gleich sind, ist $text utf-8-kodiert return ($utf eq $text) ? 1 : 0; }
Quotetest with latin1: 1
test with utf-8: 1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!perl
use strict;
use warnings;
use FileHandle;
print "test with latin1: " . test('test-iso.txt', 'latin1') . "\n";
print "test with utf-8: " . test('test-utf8.txt', 'utf-8') . "\n";
sub test {
my $file = shift;
my $encoding = shift;
my $fh = FileHandle->new($file, "<:encoding(".$encoding.")") or die('Cannot open file: ' . $!);
my $content = join"", ($fh->getlines());
my $is_utf = is_utf8($content);
return $is_utf;
} # /test
# Testet, ob ein Text UTF-8 kodiert ist
sub is_utf8{
my $text = shift;
no warnings;
use bytes; # es sind bytes zu verglichen
# text in latin umwandeln, iso-8859-1
my $iso = pack('C*', unpack('U0U*', $text));
# diesen text wieder in utf-8 kodieren
my $utf = pack('U0U*', unpack('C*', $iso));
# wenn beide Bytes-Ketten gleich sind, ist $text utf-8-kodiert
return ($utf eq $text) ? 1 : 0;
} # /is_utf8
Quotetest-iso.txt
Diese Datei ist latin1-Kodiert.