# Which are the expected Unicode codepoints? my @expected_codepoints = (9, 10, 13, # newlines+tab 32..127, (map { ord } split //, "äöüÄÖÜß"), 0x20ac, # euro sign ); # Which are the expected encodings? May be empty, in this case # all to perl available encodings will be checked. my @expected_encodings = qw(cp1252 iso-8859-1 utf-8); # Text to analyze, specify in raw octets: my $octets_to_analyze = "Bla äöü \x80"; ###################################################################### # No user-servicable parts below this point. use strict; use Encode; if (!@expected_encodings) { @expected_encodings = Encode->encodings(":all"); } my %expected_codepoints = map {($_,1)} @expected_codepoints; my @encoding_result; for my $encoding (@expected_encodings) { my $characters = eval { decode($encoding, $octets_to_analyze, Encode::FB_CROAK|Encode::LEAVE_SRC); }; if (!$@) { my %got_codepoints; for my $codepoint (map { ord } split //, $characters) { $got_codepoints{$codepoint}++; } my $fitting_codepoints = 0; while(my($k,$v) = each %got_codepoints) { if (exists $expected_codepoints{$k}) { $fitting_codepoints++; } } push @encoding_result, [$encoding, $fitting_codepoints*100/keys %got_codepoints]; } } @encoding_result = sort { $b->[1] <=> $a->[1] } @encoding_result; for my $result (@encoding_result) { printf "%-30s: %4.1f%%\n", $result->[0], $result->[1]; }