|
|
What this is
This file is included in the DevDaily.com
"Perl Source Code
Warehouse" project. The intent of this project is to help you "Learn
Perl by Example" TM.
Other links
The source code
# $Id: encoding.pm,v 2.0 2004/05/16 20:55:16 dankogai Exp $
package encoding;
our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode;
use strict;
sub DEBUG () { 0 }
BEGIN {
if (ord("A") == 193) {
require Carp;
Carp::croak("encoding pragma does not support EBCDIC platforms");
}
}
our $HAS_PERLIO = 0;
eval { require PerlIO::encoding };
unless ($@){
$HAS_PERLIO = (PerlIO::encoding->VERSION >= 0.02);
}
sub _exception{
my $name = shift;
$] > 5.008 and return 0; # 5.8.1 or higher then no
my %utfs = map {$_=>1}
qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
UTF-32 UTF-32BE UTF-32LE);
$utfs{$name} or return 0; # UTFs or no
require Config; Config->import(); our %Config;
return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no
}
sub import {
my $class = shift;
my $name = shift;
my %arg = @_;
$name ||= $ENV{PERL_ENCODING};
my $enc = find_encoding($name);
unless (defined $enc) {
require Carp;
Carp::croak("Unknown encoding '$name'");
}
$name = $enc->name; # canonize
unless ($arg{Filter}) {
DEBUG and warn "_exception($name) = ", _exception($name);
_exception($name) or ${^ENCODING} = $enc;
$HAS_PERLIO or return 1;
}else{
defined(${^ENCODING}) and undef ${^ENCODING};
# implicitly 'use utf8'
require utf8; # to fetch $utf8::hint_bits;
$^H |= $utf8::hint_bits;
eval {
require Filter::Util::Call ;
Filter::Util::Call->import ;
filter_add(sub{
my $status = filter_read();
if ($status > 0){
$_ = $enc->decode($_, 1);
DEBUG and warn $_;
}
$status ;
});
};
} DEBUG and warn "Filter installed";
defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
for my $h (qw(STDIN STDOUT)){
if ($arg{$h}){
unless (defined find_encoding($arg{$h})) {
require Carp;
Carp::croak("Unknown encoding for $h, '$arg{$h}'");
}
eval { binmode($h, ":raw :encoding($arg{$h})") };
}else{
unless (exists $arg{$h}){
eval {
no warnings 'uninitialized';
binmode($h, ":raw :encoding($name)");
};
}
}
if ($@){
require Carp;
Carp::croak($@);
}
}
return 1; # I doubt if we need it, though
}
sub unimport{
no warnings;
undef ${^ENCODING};
if ($HAS_PERLIO){
binmode(STDIN, ":raw");
binmode(STDOUT, ":raw");
}else{
binmode(STDIN);
binmode(STDOUT);
}
if ($INC{"Filter/Util/Call.pm"}){
eval { filter_del() };
}
}
1;
__END__
=pod
=head1 NAME
encoding - allows you to write your script in non-ascii or non-utf8
=head1 SYNOPSIS
use encoding "greek"; # Perl like Greek to you?
use encoding "euc-jp"; # Jperl!
# or you can even do this if your shell supports your native encoding
perl -Mencoding=latin2 -e '...' # Feeling centrally European?
perl -Mencoding=euc-kr -e '...' # Or Korean?
# more control
# A simple euc-cn => utf-8 converter
use encoding "euc-cn", STDOUT => "utf8"; while(<>){print};
# "no encoding;" supported (but not scoped!)
no encoding;
# an alternate way, Filter
use encoding "euc-jp", Filter=>1;
# now you can use kanji identifiers -- in euc-jp!
=head1 ABSTRACT
Let's start with a bit of history: Perl 5.6.0 introduced Unicode
support. You could apply C and regexes even to complex CJK
characters -- so long as the script was written in UTF-8. But back
then, text editors that supported UTF-8 were still rare and many users
instead chose to write scripts in legacy encodings, giving up a whole
new feature of Perl 5.6.
Rewind to the future: starting from perl 5.8.0 with the B
pragma, you can write your script in any encoding you like (so long
as the C module supports it) and still enjoy Unicode support.
This pragma achieves that by doing the following:
=over
=item *
Internally converts all literals (C) from
the encoding specified to utf8. In Perl 5.8.1 and later, literals in
C and C pseudo-filehandle are also converted.
=item *
Changing PerlIO layers of C and C to the encoding
specified.
=back
=head2 Literal Conversions
You can write code in EUC-JP as follows:
my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
#<-char-><-char-> # 4 octets
s/\bCamel\b/$Rakuda/;
And with C
|