diff -rc perl-5.8.0/patchlevel.h perl-5.8.0.patched/patchlevel.h
*** perl-5.8.0/patchlevel.h     Fri Jul 19 00:08:27 2002
--- perl-5.8.0.patched/patchlevel.h     Thu Nov 15 17:18:17 2007
***************
*** 82 ****
!       ,NULL
--- 82,83 ----
!       ,"REGEXP0 - fix for UTF-8 recoding in regexps - CVE-2007-5116"
!       ,NULL
diff -rc perl-5.8.0/regcomp.c perl-5.8.0.patched/regcomp.c
*** perl-5.8.0/regcomp.c        Mon Jul  8 21:10:49 2002
--- perl-5.8.0.patched/regcomp.c        Thu Nov 15 17:18:17 2007
***************
*** 125,131 ****
     I32               extralen;
     I32               seen_zerolen;
     I32               seen_evals;
!     I32               utf8;
 #if ADD_TO_REGEXEC
     char      *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry (pRExC_state->starttry)
--- 125,134 ----
     I32               extralen;
     I32               seen_zerolen;
     I32               seen_evals;
!     I32               utf8;           /* whether the pattern is utf8 or not */
!     I32               orig_utf8;      /* whether the pattern was originally in utf8 */
!                               /* XXX use this for future optimisation of case
!                                * where pattern must be upgraded to utf8. */
 #if ADD_TO_REGEXEC
     char      *starttry;              /* -Dr: where regtry was called. */
 #define RExC_starttry (pRExC_state->starttry)
***************
*** 151,156 ****
--- 154,160 ----
 #define RExC_seen_zerolen     (pRExC_state->seen_zerolen)
 #define RExC_seen_evals       (pRExC_state->seen_evals)
 #define RExC_utf8     (pRExC_state->utf8)
+ #define RExC_orig_utf8        (pRExC_state->orig_utf8)

 #define       ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
 #define       ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
***************
*** 1737,1751 ****
     if (exp == NULL)
       FAIL("NULL regexp argument");

!     RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;

-     RExC_precomp = exp;
     DEBUG_r({
        if (!PL_colorset) reginitcolors();
        PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
                      PL_colors[4],PL_colors[5],PL_colors[0],
!                      (int)(xend - exp), RExC_precomp, PL_colors[1]);
     });
     RExC_flags = pm->op_pmflags;
     RExC_sawback = 0;

--- 1741,1757 ----
     if (exp == NULL)
       FAIL("NULL regexp argument");

!     RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;

     DEBUG_r({
        if (!PL_colorset) reginitcolors();
        PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
                      PL_colors[4],PL_colors[5],PL_colors[0],
!                      (int)(xend - exp), exp, PL_colors[1]);
     });
+
+ redo_first_pass:
+     RExC_precomp = exp;
     RExC_flags = pm->op_pmflags;
     RExC_sawback = 0;

***************
*** 1770,1775 ****
--- 1776,1800 ----
     if (reg(pRExC_state, 0, &flags) == NULL) {
       RExC_precomp = Nullch;
       return(NULL);
+     }
+     if (RExC_utf8 && !RExC_orig_utf8) {
+         /* It's possible to write a regexp in ascii that represents unicode
+         codepoints outside of the byte range, such as via \x{100}. If we
+         detect such a sequence we have to convert the entire pattern to utf8
+         and then recompile, as our sizing calculation will have been based
+         on 1 byte == 1 character, but we will need to use utf8 to encode
+         at least some part of the pattern, and therefore must convert the whole
+         thing.
+         XXX: somehow figure out how to make this less expensive...
+         -- dmq */
+         STRLEN len = xend-exp;
+         DEBUG_r(PerlIO_printf(Perl_debug_log,
+           "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
+         exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
+         xend = exp + len;
+         RExC_orig_utf8 = RExC_utf8;
+         SAVEFREEPV(exp);
+         goto redo_first_pass;
     }
     DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));

diff -rc perl-5.8.0/t/op/pat.t perl-5.8.0.patched/t/op/pat.t
*** perl-5.8.0/t/op/pat.t       Mon Jul  1 15:42:19 2002
--- perl-5.8.0.patched/t/op/pat.t       Thu Nov 15 17:19:14 2007
***************
*** 6,12 ****

 $| = 1;

! print "1..922\n";

 BEGIN {
     chdir 't' if -d 't';
--- 6,12 ----

 $| = 1;

! print "1..924\n";

 BEGIN {
     chdir 't' if -d 't';
***************
*** 2900,2904 ****
--- 2900,2913 ----
       }
     }
 }
+
+ {
+     use warnings;
+     my @w;
+     local $SIG{__WARN__}=sub{push @w,"@_"};
+     my $c=qq(\x{DF});
+     ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8");
+     ok(@w==0, "No warnings");
+ }

 $test = 923;