NAME
   IRC::Utils - Common utilities for IRC-related tasks

SYNOPSIS
    use strict;
    use warnings;

    use IRC::Utils ':ALL';

    my $nickname = '^Lame|BOT[moo]';
    my $uppercase_nick = uc_irc($nickname);
    my $lowercase_nick = lc_irc($nickname);

    print "They're equivalent\n" if eq_irc($uppercase_nick, $lowercase_nick);

    my $mode_line = 'ov+b-i Bob sue stalin*!*@*';
    my $hashref = parse_mode_line($mode_line);

    my $banmask = 'stalin*';
    my $full_banmask = normalize_mask($banmask);

    if (matches_mask($full_banmask, '[email protected]')) {
        print "EEK!";
    }

    my $decoded = irc_decode($raw_irc_message);
    print $decoded, "\n";

    if (has_color($message)) {
       print 'COLOR CODE ALERT!\n";
    }

    my $results_hashref = matches_mask_array(\@masks, \@items_to_match_against);

    my $nick = parse_user('[email protected]');
    my ($nick, $user, $host) = parse_user('[email protected]');

DESCRIPTION
   The functions in this module take care of many of the tasks you are
   faced with when working with IRC. Mode lines, ban masks, message
   encoding and formatting, etc.

FUNCTIONS
 "uc_irc"
   Takes one mandatory parameter, a string to convert to IRC uppercase, and
   one optional parameter, the casemapping of the ircd (which can be
   'rfc1459', 'strict-rfc1459' or 'ascii'. Default is 'rfc1459'). Returns
   the IRC uppercase equivalent of the passed string.

 "lc_irc"
   Takes one mandatory parameter, a string to convert to IRC lowercase, and
   one optional parameter, the casemapping of the ircd (which can be
   'rfc1459', 'strict-rfc1459' or 'ascii'. Default is 'rfc1459'). Returns
   the IRC lowercase equivalent of the passed string.

 "eq_irc"
   Takes two mandatory parameters, IRC strings (channels or nicknames) to
   compare. A third, optional parameter specifies the casemapping. Returns
   true if the two strings are equivalent, false otherwise

    # long version
    lc_irc($one, $map) eq lc_irc($two, $map)

    # short version
    eq_irc($one, $two, $map)

 "parse_mode_line"
   Takes a list representing an IRC mode line. Returns a hashref.
   Optionally you can also supply an arrayref and a hashref to specify
   valid channel modes (default: "[qw(beI k l imnpstaqr)]") and status
   modes (default: "{o => '@', h => '%', v => '+'}"), respectively.

   If the modeline couldn't be parsed the hashref will be empty. On success
   the following keys will be available in the hashref:

   'modes', an arrayref of normalised modes;

   'args', an arrayref of applicable arguments to the modes;

   Example:

    my $hashref = parse_mode_line( 'ov+b-i', 'Bob', 'sue', 'stalin*!*@*' );

    # $hashref will be:
    {
       modes => [ '+o', '+v', '+b', '-i' ],
       args  => [ 'Bob', 'sue', 'stalin*!*@*' ],
    }

 "normalize_mask"
   Takes one parameter, a string representing an IRC mask. Returns a
   normalised full mask.

   Example:

    $fullbanmask = normalize_mask( 'stalin*' );

    # $fullbanmask will be: 'stalin*!*@*';

 "matches_mask"
   Takes two parameters, a string representing an IRC mask and something to
   match against the IRC mask, such as a nick!user@hostname string. Returns
   a true value if they match, a false value otherwise. Optionally, one may
   pass the casemapping (see "uc_irc"), as this function uses "uc_irc"
   internally.

 "matches_mask_array"
   Takes two array references, the first being a list of strings
   representing IRC masks, the second a list of somethings to test against
   the masks. Returns an empty hashref if there are no matches. Otherwise,
   the keys will be the masks matched, each value being an arrayref of the
   strings that matched it. Optionally, one may pass the casemapping (see
   "uc_irc"), as this function uses "uc_irc" internally.

 "unparse_mode_line"
   Takes one argument, a string representing a number of mode changes.
   Returns a condensed version of the changes.

     my $mode_line = unparse_mode_line('+o+o+o-v+v');
     $mode_line is now '+ooo-v+v'

 "gen_mode_change"
   Takes two arguments, strings representing a set of IRC user modes before
   and after a change. Returns a string representing what changed.

     my $mode_change = gen_mode_change('abcde', 'befmZ');
     $mode_change is now '-acd+fmZ'

 "parse_user"
   Takes one parameter, a string representing a user in the form
   nick!user@hostname. In a scalar context it returns just the nickname. In
   a list context it returns a list consisting of the nick, user and
   hostname, respectively.

 "is_valid_chan_name"
   Takes one argument, a channel name to validate. Returns true or false if
   the channel name is valid or not. You can supply a second argument, an
   array of characters of allowed channel prefixes. Defaults to "['#',
   '&']".

 "is_valid_nick_name"
   Takes one argument, a nickname to validate. Returns true or false if the
   nickname is valid or not.

 "numeric_to_name"
   Takes an IRC server numerical reply code (e.g. '001') as an argument,
   and returns the corresponding name (e.g. 'RPL_WELCOME').

 "name_to_numeric"
   Takes an IRC server reply name (e.g. 'RPL_WELCOME') as an argument, and
   returns the corresponding numerical code (e.g. '001').

 "has_color"
   Takes one parameter, a string of IRC text. Returns true if it contains
   any IRC color codes, false otherwise. Useful if you want your bot to
   kick users for (ab)using colors. :)

 "has_formatting"
   Takes one parameter, a string of IRC text. Returns true if it contains
   any IRC formatting codes, false otherwise.

 "strip_color"
   Takes one parameter, a string of IRC text. Returns the string stripped
   of all IRC color codes.

 "strip_formatting"
   Takes one parameter, a string of IRC text. Returns the string stripped
   of all IRC formatting codes.

 "decode_irc"
   This function takes a byte string (i.e. an unmodified IRC message) and
   returns a text string. Since the source encoding might have been UTF-8,
   you should store it with UTF-8 or some other Unicode encoding in your
   file/database/whatever to be safe. For a more detailed discussion, see
   "ENCODING".

    use IRC::Utils qw(decode_irc);

    sub message_handler {
        my ($nick, $channel, $message) = @_;

        # not wise, $message is a byte string of unkown encoding
        print $message, "\n";

        $message = decode_irc($what);

        # good, $message is a text string
        print $message, "\n";
    }

CONSTANTS
   Use the following constants to add formatting and mIRC color codes to
   IRC messages.

   Normal text:

    NORMAL

   Formatting:

    BOLD
    UNDERLINE
    REVERSE
    ITALIC
    FIXED

   Colors:

    WHITE
    BLACK
    BLUE
    GREEN
    RED
    BROWN
    PURPLE
    ORANGE
    YELLOW
    LIGHT_GREEN
    TEAL
    LIGHT_CYAN
    LIGHT_BLUE
    PINK
    GREY
    LIGHT_GREY

   Individual non-color formatting codes can be cancelled with their
   corresponding constant, but you can also cancel all of them at once with
   "NORMAL". To cancel the effect of color codes, you must use "NORMAL".
   which of course has the side effect of cancelling all other formatting
   codes as well.

    $msg = 'This word is '.YELLOW.'yellow'.NORMAL.' while this word is'.BOLD.'bold'.BOLD;
    $msg = UNDERLINE.BOLD.'This sentence is both underlined and bold.'.NORMAL;

ENCODING
 Messages
   The only encoding requirement the IRC protocol places on its messages is
   that they be 8-bits and ASCII-compatible. This has resulted in most of
   the Western world settling on ASCII-compatible Latin-1 (usually
   Microsoft's CP1252, a Latin-1 variant) as a convention. Recently,
   popular IRC clients (mIRC, xchat, certain irssi configurations) have
   begun sending a mixture of CP1252 and UTF-8 over the wire to allow more
   characters without breaking backward compatibility (too much). They send
   CP1252 encoded messages if the characters fit within that encoding,
   otherwise falling back to UTF-8, and likewise autodetecting the encoding
   (UTF-8 or CP1252) of incoming messages. Since writing text with mixed
   encoding to a file, terminal, or database is not a good idea, you need a
   way to decode messages from IRC. "decode_irc" will do that.

 Channel names
   The matter is complicated further by the fact that some servers allow
   non-ASCII characters in channel names. IRC modules generally don't
   explicitly encode or decode any IRC traffic, but they do have to
   concatenate parts of a message (e.g. a channel name and a message)
   before sending it over the wire. So when you do something like
   "privmsg($channel, 'æði')", where $channel is the unmodified channel
   name (a byte string) you got from an earlier IRC message, the channel
   name will get double-encoded when concatenated with your message (a
   non-ASCII text string) if the channel name contains non-ASCII bytes.

   To prevent this, you can't simply decode the channel name and then use
   it. '#æði' in CP1252 is not the same channel as '#æði' in UTF-8, since
   they are encoded as different sequences of bytes, and the IRC server
   only cares about the byte representation. Therefore, when using a
   channel name you got from the server (e.g. when replying to message),
   you should use the original byte string (before it has been decoded with
   "decode_irc"), and encode any other parameters (with "encode_utf8") so
   that your message will be concatenated correctly. At some point, you'll
   probably want to print the channel name, write it to a log file or use
   it in a filename, so you'll eventually have to decode it, at which point
   the UTF-8 "#æði" and CP1252 "#æði" will have to be considered
   equivalent.

    use Encode qw(encode_utf8 encode);

    sub message_handler {
        # these three are all byte strings
        my ($nick, $channel, $message) = @_;

        # bad: if $channel has any non-ASCII bytes, they will get double-encoded
        privmsg($channel, 'æði');

        # bad: if $message has any non-ASCII bytes, they will get double-encoded
        privmsg('#æði', $message);

        # good: both are byte strings already, so they will concatenate correctly
        privmsg($channel, $message);

        # good: both are text strings (Latin1 as per Perl's default), so
        # they'll be concatenated correctly
        privmsg('#æði', 'æði');

        # good: similar to the last one, except now they're using UTF-8, which
        # means that the channel is actually not the same as above
        use utf8;
        privmsg('#æði', 'æði');

        # good: $channel and $msg_bytes are both byte strings
        my $msg_bytes = encode_utf8('æði');
        privmsg($channel, $msg_bytes);

        # good: $chan_bytes and $message are both byte strings
        # here we're sending a message to the utf8-encoded #æði
        my $utf8_bytes = encode_utf8('#æði');
        privmsg($utf8_bytes, $message);

        # good: $chan_bytes and $message are both byte strings
        # here we're sending a message to the cp1252-encoded #æði
        my $cp1252_bytes = encode('cp1252', '#æði');
        privmsg($cp1252_bytes, $message);

        # bad: $channel is in an undetermined encoding
        log_message("Got message from $channel");

        # good: using the decoded version of $channel
        log_message("Got message from ".decode_irc($channel));
    }

   See also Encode, perluniintro, perlunitut, perlunicode, and perlunifaq.

AUTHOR
   Hinrik Örn Sigurðsson <[email protected]> ("Hinrik" irc.perl.org, or
   "literal" @ FreeNode).

   Chris "BinGOs" Williams <[email protected]>

SEE ALSO
   POE::Component::IRC

   POE::Component::Server::IRC