NAME

 UTF8::R2 - makes UTF-8 scripting easy for enterprise use or LTS

SYNOPSIS

 use UTF8::R2;
 use UTF8::R2 ver.sion;            # match or die
 use UTF8::R2 qw( RFC3629 );       # m/./ matches RFC3629 codepoint (default)
 use UTF8::R2 qw( RFC2279 );       # m/./ matches RFC2279 codepoint
 use UTF8::R2 qw( WTF8 );          # m/./ matches WTF-8 codepoint
 use UTF8::R2 qw( RFC3629.ja_JP ); # optimized RFC3629 for ja_JP
 use UTF8::R2 qw( WTF8.ja_JP );    # optimized WTF-8 for ja_JP
 use UTF8::R2 qw( %mb );           # multibyte regex by %mb

DESCRIPTION

 UTF8::R2 module provides minimal UTF-8 subroutines for stable scripting
 environment, using no utf8 pragma, no UTF-8 flag.

   # on use UTF8::R2 qw( RFC2279 );
   # m/./ means
   # beautiful concept in young days
   # https://www.ietf.org/rfc/rfc2279.txt
   'RFC2279' => qr{(?>@{[join('', qw(
       [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
       [\xC2-\xDF][\x80-\xBF]                       |
       [\xE0-\xEF][\x80-\xBF][\x80-\xBF]            |
       [\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
       [\x00-\xFF]
   ))]})}x,

   # on use UTF8::R2;
   # or use UTF8::R2 qw( RFC3629 );
   # m/./ means
   # https://tools.ietf.org/rfc/rfc3629.txt
   'RFC3629' => qr{(?>@{[join('', qw(
       [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
       [\xC2-\xDF][\x80-\xBF]                       |
       [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
       [\xE1-\xEC][\x80-\xBF][\x80-\xBF]            |
       [\xED-\xED][\x80-\x9F][\x80-\xBF]            |
       [\xEE-\xEF][\x80-\xBF][\x80-\xBF]            |
       [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
       [\x00-\xFF]
   ))]})}x,

   # or use UTF8::R2 qw( WTF8 );
   # m/./ means
   # http://simonsapin.github.io/wtf-8/
   'WTF8' => qr{(?>@{[join('', qw(
       [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
       [\xC2-\xDF][\x80-\xBF]                       |
       [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
       [\xE1-\xEF][\x80-\xBF][\x80-\xBF]            |
       [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
       [\x00-\xFF]
   ))]})}x,

   # or use UTF8::R2 qw( RFC3629.ja_JP );
   # m/./ means
   # optimized RFC3629 for ja_JP
   'RFC3629.ja_JP' => qr{(?>@{[join('', qw(
       [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
       [\xE1-\xEC][\x80-\xBF][\x80-\xBF]            |
       [\xC2-\xDF][\x80-\xBF]                       |
       [\xEE-\xEF][\x80-\xBF][\x80-\xBF]            |
       [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
       [\xED-\xED][\x80-\x9F][\x80-\xBF]            |
       [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
       [\x00-\xFF]
   ))]})}x,

   # or use UTF8::R2 qw( WTF8.ja_JP );
   # m/./ means
   # optimized WTF-8 for ja_JP
   'WTF8.ja_JP' => qr{(?>@{[join('', qw(
       [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
       [\xE1-\xEF][\x80-\xBF][\x80-\xBF]            |
       [\xC2-\xDF][\x80-\xBF]                       |
       [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
       [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
       [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
       [\x00-\xFF]
   ))]})}x,

SUBROUTINES

 VERY USEFUL UTF-8 CODEPOINT FEATURE
   UTF8::R2::qr(qr/ utf8_regex_here . \D \H \N \R \S \V \W \b \d \h \s \v \w \x{Unicode} [ \D \H \S \V \W \b \d \h \s \v \w \x{Unicode} ] ? + * {n} {n,} {n,m} /imsxogc)
   UTF8::R2::split(qr/$utf8regex/imsxo, $_, 3)
   UTF8::R2::tr($_, 'ABC', 'XYZ', 'cdsr')
   use UTF8::R2 qw(%mb);
     $_ =~ $mb{qr/$utf8regex/imsxogc}
     $_ =~ s<$mb{qr/before/imsxo}><after>egr

 OTHER UTF-8 CODEPOINT FEATURE
   UTF8::R2::chop(@_)
   UTF8::R2::chr($_)
   UTF8::R2::getc(FILEHANDLE)
   UTF8::R2::index($_, 'ABC', 5)
   UTF8::R2::lc($_)
   UTF8::R2::lcfirst($_)
   UTF8::R2::length($_)
   UTF8::R2::ord($_)
   UTF8::R2::reverse(@_)
   UTF8::R2::rindex($_, 'ABC', 5)
   UTF8::R2::substr($_, 0, 5)
   UTF8::R2::uc($_)
   UTF8::R2::ucfirst($_)

SUPPORTED PERL VERSIONS

 perl version 5.005_03 to newest perl

SEE ALSO

 http://search.cpan.org/~ina/
 http://backpan.perl.org/authors/id/I/IN/INA/