NAME
UTF8::R2 - makes UTF-8 scripting easy for enterprise use or LTS
SYNOPSIS
use UTF8::R2;
use UTF8::R2 ver.sion; # match or die
use UTF8::R2 qw( RFC3629 ); # m/./ matches RFC3629 codepoint (default)
use UTF8::R2 qw( RFC2279 ); # m/./ matches RFC2279 codepoint
use UTF8::R2 qw( WTF8 ); # m/./ matches WTF-8 codepoint
use UTF8::R2 qw( RFC3629.ja_JP ); # optimized RFC3629 for ja_JP
use UTF8::R2 qw( WTF8.ja_JP ); # optimized WTF-8 for ja_JP
use UTF8::R2 qw( %mb ); # multibyte regex by %mb
DESCRIPTION
UTF8::R2 module provides minimal UTF-8 subroutines for stable scripting
environment, using no utf8 pragma, no UTF-8 flag.
# on use UTF8::R2 qw( RFC2279 );
# m/./ means
# beautiful concept in young days
#
https://www.ietf.org/rfc/rfc2279.txt
'RFC2279' => qr{(?>@{[join('', qw(
[\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] |
[\xC2-\xDF][\x80-\xBF] |
[\xE0-\xEF][\x80-\xBF][\x80-\xBF] |
[\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
[\x00-\xFF]
))]})}x,
# on use UTF8::R2;
# or use UTF8::R2 qw( RFC3629 );
# m/./ means
#
https://tools.ietf.org/rfc/rfc3629.txt
'RFC3629' => qr{(?>@{[join('', qw(
[\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] |
[\xC2-\xDF][\x80-\xBF] |
[\xE0-\xE0][\xA0-\xBF][\x80-\xBF] |
[\xE1-\xEC][\x80-\xBF][\x80-\xBF] |
[\xED-\xED][\x80-\x9F][\x80-\xBF] |
[\xEE-\xEF][\x80-\xBF][\x80-\xBF] |
[\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
[\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
[\x00-\xFF]
))]})}x,
# or use UTF8::R2 qw( WTF8 );
# m/./ means
#
http://simonsapin.github.io/wtf-8/
'WTF8' => qr{(?>@{[join('', qw(
[\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] |
[\xC2-\xDF][\x80-\xBF] |
[\xE0-\xE0][\xA0-\xBF][\x80-\xBF] |
[\xE1-\xEF][\x80-\xBF][\x80-\xBF] |
[\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
[\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
[\x00-\xFF]
))]})}x,
# or use UTF8::R2 qw( RFC3629.ja_JP );
# m/./ means
# optimized RFC3629 for ja_JP
'RFC3629.ja_JP' => qr{(?>@{[join('', qw(
[\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] |
[\xE1-\xEC][\x80-\xBF][\x80-\xBF] |
[\xC2-\xDF][\x80-\xBF] |
[\xEE-\xEF][\x80-\xBF][\x80-\xBF] |
[\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
[\xE0-\xE0][\xA0-\xBF][\x80-\xBF] |
[\xED-\xED][\x80-\x9F][\x80-\xBF] |
[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
[\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
[\x00-\xFF]
))]})}x,
# or use UTF8::R2 qw( WTF8.ja_JP );
# m/./ means
# optimized WTF-8 for ja_JP
'WTF8.ja_JP' => qr{(?>@{[join('', qw(
[\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] |
[\xE1-\xEF][\x80-\xBF][\x80-\xBF] |
[\xC2-\xDF][\x80-\xBF] |
[\xE0-\xE0][\xA0-\xBF][\x80-\xBF] |
[\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
[\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
[\x00-\xFF]
))]})}x,
SUBROUTINES
VERY USEFUL UTF-8 CODEPOINT FEATURE
UTF8::R2::qr(qr/ utf8_regex_here . \D \H \N \R \S \V \W \b \d \h \s \v \w \x{Unicode} [ \D \H \S \V \W \b \d \h \s \v \w \x{Unicode} [:POSIX:] [:^POSIX:] ] ? + * {n} {n,} {n,m} /imsxogc)
UTF8::R2::split(qr/$utf8regex/imsxo, $_, 3)
UTF8::R2::tr($_, 'ABC', 'XYZ', 'cdsr')
use UTF8::R2 qw(%mb);
$_ =~ $mb{qr/$utf8regex/imsxogc}
$_ =~ s<$mb{qr/before/imsxo}><after>egr
OTHER UTF-8 CODEPOINT FEATURE
UTF8::R2::chop(@_)
UTF8::R2::chr($_)
UTF8::R2::getc(FILEHANDLE)
UTF8::R2::index($_, 'ABC', 5)
UTF8::R2::lc($_)
UTF8::R2::lcfirst($_)
UTF8::R2::length($_)
UTF8::R2::ord($_)
UTF8::R2::reverse(@_)
UTF8::R2::rindex($_, 'ABC', 5)
UTF8::R2::substr($_, 0, 5)
UTF8::R2::uc($_)
UTF8::R2::ucfirst($_)
SUPPORTED PERL VERSIONS
perl version 5.005_03 to newest perl
SEE ALSO
http://search.cpan.org/~ina/
http://backpan.perl.org/authors/id/I/IN/INA/