NAME

 UTF8::R2 - makes UTF-8 scripting easy for enterprise use or LTS

SYNOPSIS

 use UTF8::R2;
 use UTF8::R2 ver.sion;      # match or die
 use UTF8::R2 qw( RFC3629 ); # m/./ matches RFC3629 codepoint (default)
 use UTF8::R2 qw( RFC2279 ); # m/./ matches RFC2279 codepoint
 use UTF8::R2 qw( %mb );     # multibyte regex by %mb

DESCRIPTION

 UTF8::R2 module provides minimal UTF-8 subroutines for stable scripting
 environment, using no utf8 pragma, no UTF-8 flag.

   # on use UTF8::R2;
   # or use UTF8::R2 qw( RFC3629 );
   # m/./ means
   qr{(?>
       [\x00-\x7F\xC0-\xC1\xF5-\xFF]                |
       [\xC2-\xDF][\x80-\xBF]                       |
       [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
       [\xE1-\xEC][\x80-\xBF][\x80-\xBF]            |
       [\xED-\xED][\x80-\x9F][\x80-\xBF]            |
       [\xEE-\xEF][\x80-\xBF][\x80-\xBF]            |
       [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
       [\x00-\xFF]
   )}x;

   # on use UTF8::R2 qw( RFC2279 );
   # m/./ means
   qr{(?>
       [\x00-\x7F\xC0-\xC1\xF5-\xFF]                |
       [\xC2-\xDF][\x80-\xBF]                       |
       [\xE0-\xEF][\x80-\xBF][\x80-\xBF]            |
       [\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
       [\x00-\xFF]
   )}x;

SUBROUTINES

 VERY USEFUL UTF-8 CODEPOINT FEATURE
   UTF8::R2::qr(qr/ utf8_regex_here . \D \H \N \R \S \V \W \b \d \h \s \v \w \x{Unicode} [ \D \H \S \V \W \b \d \h \s \v \w \x{Unicode} ] ? + * {n} {n,} {n,m} /imsxogc)
   UTF8::R2::split(qr/$utf8regex/imsxo, $_, 3)
   UTF8::R2::tr($_, 'ABC', 'XYZ', 'cdsr')
   use UTF8::R2 qw(%mb);
     $_ =~ $mb{qr/$utf8regex/imsxogc}
     $_ =~ s<$mb{qr/before/imsxo}><after>egr

 OTHER UTF-8 CODEPOINT FEATURE
   UTF8::R2::chop(@_)
   UTF8::R2::chr($_)
   UTF8::R2::getc(FILEHANDLE)
   UTF8::R2::index($_, 'ABC', 5)
   UTF8::R2::lc($_)
   UTF8::R2::lcfirst($_)
   UTF8::R2::length($_)
   UTF8::R2::ord($_)
   UTF8::R2::reverse(@_)
   UTF8::R2::rindex($_, 'ABC', 5)
   UTF8::R2::substr($_, 0, 5)
   UTF8::R2::uc($_)
   UTF8::R2::ucfirst($_)

SUPPORTED PERL VERSIONS

 perl version 5.005_03 to newest perl

SEE ALSO

 http://search.cpan.org/~ina/
 http://backpan.perl.org/authors/id/I/IN/INA/