#! /bin/sh

# deblob-check version 2020-12-05
# Inspired in gNewSense's find-firmware script.
# Written by Alexandre Oliva <[email protected]>

# Check http://www.fsfla.org/svn/fsfla/software/linux-libre for newer
# versions.

# Copyright 2008-2020 Alexandre Oliva <[email protected]>
#
# This program is part of GNU Linux-libre, a GNU project that
# publishes scripts to clean up Linux so as to make it suitable for
# use in the GNU Project and in Free System Distributions.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
# USA


# usage: deblob-check [-S] [-v] [-v] [-s S] [--reverse-patch] \
#        [--use-...|--gen-flex] [-lDdBbCcXxPpFftVh?H] \
#        *.tar* patch-* [-i prefix/] *.patch *.diff...

# Look for and report too-long undocumented sequences of numbers
# (generally blobs in disguise) in source files, as well as requests
# for loading non-Free firmware.

# The order of command line flags is significant.  Flags given out of
# the order above won't be handled correctly, sorry.

# -s --sensitivity: Specifies the number of consecutive integral or
#               character constants that trigger the blob detector.
#               Must be followed by a blank and a number.

#    --reverse-patch: Test the removed parts of a patch, rather than
#               the added ones.

#    --use-python: Choose the internal python script for the bulk of
#               the work.  This is the fastest for cleaning up,
#               because of the fast startup time of the regular
#               expression engine.  This option is the default if
#               python is found.  Set PYTHON to override the python
#               interpreter.  The internal script works in both Python
#               2 and 3.

#    --use-awk: Choose the internal GNU awk script for the bulk of the
#               work.  This is the recommended option to check entire
#               tarballs, because its regular expression engine offers
#               the best speed/memory use.  This is the default option
#               if python is not found.  Set AWK to specify GNU awk's
#               name.

#    --use-perl: Choose the internal perl script.  This is not
#               recommended, because our regular expressions exceed
#               some limits hard-coded into perl.  Set PERL to specify
#               which perl implementation to use.  This is the default
#               option if neither python nor GNU awk are found, AND if
#               PERL is set.

#    --use-sed: Choose the internal GNU sed script for the bulk of the
#               work.  This is the default option, if no other
#               alternative is found.  Use SED to specify which sed
#               program to use.

#    --save-script-input: Save the input that would have been fed to
#               any of the engines above.

#    --gen-flex: Generate a flex input file with all known blob and
#               false positive patterns.  It might have been a fast
#               regular expression processor if only the flex program
#               completed in reasonable time.


# For reference, as of 5.8-rc6-gnu, checking the full tarball (with
# -B, not -C) takes up:

# Engine            top mem   time
# GNU awk 5.0.1     2.2GB       1x
# Python 3.8.3         48MB    14x
# Perl 5.30.3          22MB    45x
# GNU sed 4.5      24GB         4x


# The default sensitivity is 32 constants.

# The sensitivity, if present, must be the first option.  The action
# selection, if present, must be the first argument, except for the
# sensitivity and verbosity.

# The default can be overridden with one of:

# -l --list-blobs: list files that contain sequences that match the
#               blob detector test and that are not known to be false
#               positives.  This is the default option.

# -d --deblob --mark-blobs: print the processed input, replacing
#               sequences that match the blob detector test and that
#               are NOT known to be false positives with
#               /*(DEBLOBBED)*/.

# -D --cat: print the processed input, as it would have been fed to
#               the blob detector script.  Use -S to save the sed
#               script used to process it, and search for `sedcat:' in
#               comments to locate the relevant adaptation points.

# -b --print-marked-blobs: like -d, but print only the matching
#               sequences.

# -B --print-blobs: like -b, but do not deblob the sequences.

# -c --print-marked-blobs-with-context: like -b, but try to maximize
#               the context around the blobs.  This maximization will
#               sometimes disregard known false positives, if they
#               happen to be contained within the extended match.
#               This is probably an indication that the false positive
#               matching rule could be improved.

# -C --print-blobs-with-context: like -B, but try to maximize the
#               context around the blobs.

# -X --print-all-matches: print all blobs, be they known false
#               positives or actual blobs.

# -x --list-all-matches: list files that contain sequences that appear
#               to be blobs, be they known false positives or not.

# -p --mark-false-positives: print the processed input, replacing
#               sequences that match the blob detector test, even those
#               known to be false positives, with /*(DEBLOBBED)*/.

# -P --list-false-positives: list files that contain false positives.

# -f --print-marked-false-positives: like -p, but print only the
#               matching sequences.

# -F --print-false-positives: like -f, but do not deblob the sequences.

# -t --test: run (very minimal) self-test.

# -V --version: print a version number

# -h -? -H --help: print short or long help message


# debugging options:

# -S --save-scripts: save scripts and temporary files.

# -v --verbose: increase verbosity level, for internal debugging.  May
#               be given at most twice.


# file options:

# --: Don't process command-line options any further.  All following
#               arguments are taken as filenames.

# -i --implied-prefix --prefix: prepend the given prefix to each filename
#               listed after this option, when configuring false positives
#               and negatives.

# *.tar*: iterate over all files in the named tar file.

# *.patch, patch-*, *.diff: Look for blobs in the [ +] parts of the
#               *patch, unless --reverse-patch is given, in which case
#               the [ -] parts will be used.

# Anything else is assumed to be a source file.

# *.gz | *.bz2 | *.xz | *.lz: Decompress automatically.


# The exit status is only significant for the --list options: it will
# be true if nothing was found, and false otherwise.

: # Mark the end of the help message.

# TODO:

# - Improve handling of command-line arguments, so as to not make the
# order relevant.

# - Add an option for the user to feed their own false positive
# patterns.

# - Add support to recognize known blobs (or other non-Free
# signatures, really), to speed up the scanning of files containing
# blobs, and to avoid attempts to disguise blobs.

# - Factor out the code in the various print_* and list_* parts of the
# sed script, at least in the shell sources.  Make sure they're all
# included and expanded in a saved --cat script though.

# - Add support for file name tagging in patterns, such that blobs or
# false positives are recognized only when handling the specific
# filename, be it stand-alone, as part of a patch or a tarball.  This
# should help avoid recognition of actual blobs as false positives
# just because there's a symbol with a different name elsewhere.

#   It is convenient that the patterns provided by the user to
# recognize file names can be empty (for backward compatibility), but
# this should ideally be phased out in favor of more precise matches.
# It's important that files can be recognized with leading tarball or
# patch names, that the filename used within the tarball contain
# leading garbage, and even that a partial pathname be recognizable
# (say recognize drivers/net/whatever.c when the input file is named
# ../net/whatever.c).

#   Rather than using regular expressions to recognize multiple files
# it's convenient (but not quite essential) that filename patterns be
# specifiable as regular expressions, rather than simple filenames,
# but there are other ways around this.

#   Maintaining begin/end markers in a stack-like fashion as part of
# the processed stream, and using the names in them as (optional) part
# of the recognition patterns, would enable us to do it.

#   Introducing annotations next to the false positives (and recognized
# blobs) as an early part of the process may speed things up and
# enable fast processing, but how to introduce the annotations quickly
# in the first place?  Given patterns such as

#   \(\(file1\)\(.*\)\(pat1\)\|\(file2\)\(.*\)\(pat2\)\|...\)

# how do we get sed to introduce a marker that contains file2 right
# before or right after pat2, without turning a big efficient regexp
# into a slowish sequence of s/// commands?

# - Re-check and narrow false-positive patterns to make sure they
# apply only to the relevant content.

# - Scripting abilities, so as to be able to automate the removal of
# source files or of blobs from source files in a tarball without
# having to extract the entire tarball (as in tar --update/--delete)
# would be nice.  Carrying over removed files automatically into
# patches would also be great, and this sort of script would be
# perfect to document what has been done to a tarball plus a set of
# patches.  Something like deblob.script:
#
#   tarball linux-2.6.24.tar.bz2
#   delete net/wireloss/freedom.c drivers/me/crazy.c
#   deblob include/linux/slab-blob-kfree.h
#   deconfig drivers/char/drm DRM_IS_BAD
#
#   patch patch-2.6.25-rc7.bz2
#   delete arch/power/over/you.c

# such that the deletes from an earlier file would carry over into the
# subsequent ones, and new tarballs and patch files would be generated
# with the libre- prefix in their basename, and the xdeltas between
# the original files and the modified files would be minimal, and
# redundant with this script and the input script while at that.

# - Improve documentation of the code.

# - Write a decent testsuite.

# - Insert your idea here. :-)

# Yeah, lots of stuff to do.  Want to help?

# This makes it much faster, and mostly immune to non-ASCII stuff, as
# long as a 8-bit-safe sed is used.  Probably a safe assumption these
# days.
LC_ALL=C; export LC_ALL
LANGUAGE=C; export LANGUAGE

rm="rm -f"

for echo in 'echo' 'printf %s\n'; do
 case `$echo '\nx'` in
 '\nx') break;;
 esac
done
case `$echo '\nx'` in
'\nx') ;; *) echo Cannot find out what echo to use >&2; exit 1;;
esac

for echo_n in "echo -n" "printf %s"; do
 case `$echo_n '\na'; $echo_n '\nb'` in
 '\na\nb') break;;
 esac
done
case `$echo_n a; $echo_n b` in
'ab') ;; *) echo Cannot find out an echo -n equivalent to use >&2; exit 1;;
esac

case $1 in
--save-scripts | -S)
 shift
 rm="echo preserving"
 ;;
esac

# Choose verbosity level for sed script debugging and performance
# analysis.
case $1 in
--verbose | -v)
 shift
 case $1 in
 --verbose | -v)
   shift
   v="i\\
:
p
i\\
"
   vp="2"
   ;;
 *)
   v="P;i\\
"
   vp="1"
   ;;
 esac
 ;;
*)
 v="# "
 vp="0"
 ;;
esac

sens=31 # 32 - 1
case $1 in
--sensitivity | -s)
 sens=$2;
 shift 2 || exit 1

 if test "$sens" -gt 0 2>/dev/null; then
   :
 else
   echo invalid sensitivity: $sens >&2
   exit 1
 fi

 sens=`expr $sens - 1`
 ;;
esac

reverse_patch=false
case $1 in
--reverse-patch)
 reverse_patch=:
 shift;
 ;;
esac

prefix=/
case $1 in
--implied-prefix | --prefix| -i)
 prefix=$2
 case $prefix in
 /*/) ;;
 */) prefix=/$prefix ;;
 /*) prefix=$prefix/ ;;
 *) prefix=/$prefix/ ;;
 esac
 shift 2 || exit 1
 ;;
esac

test_mode=false

name=deblob-check

set_eqscript_main () {
 $set_main_cmd "$@"
}

set_eqscript_cmd () {
 set_eqscript_main "list_blob"
}

set_sed_cmd () {
 set_sed_main "
i\\
$file\\
/*(DEBLOB-\\
ERROR)*/
q 1"
}

set_flex_cmd () {
 set_flex_main
}

set_save_script_input_cmd () {
 set_save_script_input_main
}

set_cmd=set_eqscript_cmd
if (${PYTHON-python} --version) > /dev/null 2>&1; then
 # Python will exhibit exponential behavior processing some regular
 # expressions, but we may have already fixed them all.  (see
 # http://swtch.com/~rsc/regexp/regexp1.html for details)
 set_main_cmd=set_python_main
elif (${AWK-gawk} --re-interval --version) > /dev/null 2>&1; then
 # GNU awk works fine, but it requires --re-interval to accept regexp
 # ranges, which we rely on to match blobs.  We could expand the blob
 # on our own, but, yuck.
 set_main_cmd=set_awk_main
elif (${PERL-false} --version) > /dev/null 2>&1; then
 # Don't choose perl by default.  Besides the potential for
 # exponential behavior, we exceed some internal recursion limits.
 set_main_cmd=set_perl_main
else
 # Sed takes GBs of RAM to compile all the huge regexps in the sed
 # script we generate with all known false positives and blobs in
 # Linux.  However, it is somewhat faster than GNU awk and even
 # python for long runs.
 # Try it: deblob-check --use-sed linux-2.6.32.tar.bz2
 set_cmd=set_sed_cmd
fi

case $1 in
--use-python)
 shift;
 set_cmd=set_eqscript_cmd;
 set_main_cmd=set_python_main;
 ;;

--use-perl)
 shift;
 set_cmd=set_eqscript_cmd;
 set_main_cmd=set_perl_main;
 ;;

--use-awk)
 shift;
 set_cmd=set_eqscript_cmd;
 set_main_cmd=set_awk_main;
 ;;

--use-sed)
 shift;
 set_cmd=set_sed_cmd;
 ;;

--gen-flex)
 shift;
 set_cmd=set_flex_cmd;
 ;;

--save-script-input)
 shift;
 set_cmd=set_save_script_input_cmd;
 ;;
esac

case $1 in
--version | -V)
 ${SED-sed} -e '/^# '$name' version /,/^# Written by/ { s/^# //; p; }; d' < $0
 exit 0
 ;;

-\? | -h)
 ${SED-sed} -n -e '/^# usage:/,/# -h/ { /^# -/,/^$/{s/^# \(-.*\):.*/\1/p; d; }; s/^\(# \?\)\?//p; }' < $0 &&
 echo
 echo "run \`$name --help | more' for full usage"
 exit 0
 ;;

--help | -H)
 ${SED-sed} -n -e '/^# '$name' version /,/^[^#]/ s/^\(# \?\)\?//p' < $0
 exit 0
 ;;

--test | -t)
 test_mode=:
 ;;

--mark-false-positives | -p)
 shift;
 set_sed_cmd () {
   set_sed_main "b list_both" "p" "b list_matches"
 }
 set_eqscript_cmd () {
   set_eqscript_main "replace_blob = print_blob = without_falsepos"
 }
 ;;

--print-marked-false-positives | -f)
 shift;
 set_sed_cmd () {
   set_sed_main "b print_marked_matches" "" "b print_marked_matches"
 }
 set_eqscript_cmd () {
   set_eqscript_main "replace_falsepos = print_falsepos"
 }
 ;;

--print-false-positives | -F)
 shift;
 set_sed_cmd () {
   set_sed_main "b print_matches" "" "b print_matches"
 }
 set_eqscript_cmd () {
   set_eqscript_main "print_falsepos"
 }
 ;;

--deblob | --mark-blobs | -d)
 shift;
 set_sed_cmd () {
   set_sed_main "b list_blobs" "p" "p"
 }
 set_eqscript_cmd () {
   set_eqscript_main "replace_blob = print_blob = print_falsepos = print_nomatch"
 }
 ;;

--cat | -D)
 shift;
 set_sed_cmd () {
   set_sed_main \
     "# sedcat: Actual blob detected, but there may be false positives." \
     "# sedcat: No blob whatsoever found." \
     "# sedcat: False positives found." \
     "p
d
# sedcat: Just print stuff, remove this line to run the actual script."
 }
 set_eqscript_cmd () {
   set_eqscript_main "print_blob = print_falsepos = print_nomatch"
 }
 ;;

--print-marked-blobs | -b)
 shift;
 set_sed_cmd () {
   set_sed_main "b print_marked_blobs"
 }
 set_eqscript_cmd () {
   set_eqscript_main "replace_blob = print_blob"
 }
 ;;

--print-blobs | -B)
 shift;
 set_sed_cmd () {
   set_sed_main "b print_blobs"
 }
 set_eqscript_cmd () {
   set_eqscript_main "print_blob"
 }
 ;;

--print-marked-blobs-with-context | -c)
 shift;
 set_sed_cmd () {
   set_sed_main "b print_marked_cblobs"
 }
 set_eqscript_cmd () {
   set_eqscript_main "with_context = replace_blob = print_blob"
 }
 ;;

--print-blobs-with-context | -C)
 shift;
 set_sed_cmd () {
   set_sed_main "b print_cblobs"
 }
 set_eqscript_cmd () {
   set_eqscript_main "with_context = print_blob"
 }
 ;;

--list-false-positives | -P)
 shift;
 set_sed_cmd () {
   set_sed_main "" "" "
i\\
$file\\
/*(DEBLOB-\\
ERROR)*/
q 1"
 }
 set_eqscript_cmd () {
   set_eqscript_main "list_falsepos"
 }
 ;;

--list-all-matches | -x)
 shift;
 set_sed_cmd () {
   set_sed_main "
i\\
$file\\
/*(DEBLOB-\\
ERROR)*/
q 1" "" "
i\\
$file\\
/*(DEBLOB-\\
ERROR)*/
q 1"
 }
 set_eqscript_cmd () {
   set_eqscript_main "list_blob = list_falsepos"
 }
 ;;

--print-all-matches | -X)
 shift;
 set_sed_cmd () {
   set_sed_main "b print_both" "" "b print_matches"
 }
 set_eqscript_cmd () {
   set_eqscript_main "print_blob = print_falsepos"
 }
 ;;

*)
 case $1 in
 --list-blobs | -l) shift;;
 esac
 case $1 in
 -- | --implied-prefix | --prefix | -i) ;;
 -*)
   if test ! -f "$1"; then
     echo "$name: \`$1' given too late or out of the proper sequence." >&2
     echo "$name: The order of arguments is significant, see the usage." >&2
     exit 1
   fi
   ;;
 esac
 ;;

esac

case $1 in
--)
 sawdashdash=t
 shift;;
esac

if $test_mode; then
allpass=:
for tool in awk perl python sed; do
 echo testing $tool...

 targs="-s 4 -i /deblob-check-testsuite/ --use-$tool"

 pass=:


 # Exercise some nasty inputs to see that we
 # recognize them as blobs with full context.
 test="positive context"
 for string in \
   "1,2,3,4" \
   "= {
1, 0x2, 03, L'\x4'
}" \
   "=
{
 '\\x1', '\\002'
 ,
 {
   { \"\\x3\", },
   \"\\004\"
 },
};" \
   ".long 1,2
    .long \$3,\$4" \
   "#define X { 1, 2, \\
                3, 4, /* comment */ \\
              }" \
   "= {
/*
* multi-line
* comment
*/
{
  0x4c00c000, 0x00000000, 0x00060000, 0x00000000,
},
}" \
   "= {
blob(
)
accept(
)
1, 2, 3, 4
}" \
 ; do
   case `$echo "$string" | $0 $targs -C` in
   "::: - :::
$string") ;;
   *) $echo "failed $test test for:
$string" >&2
      pass=false;;
   esac
 done

 # Make sure we do not recognize these as blobs.
 test=negative
 for string in \
   "#define X { 1, 2 }
#define Y { 3, 4 }" \
   " 0x00, 0x00, 0x00 " \
   "accept(1, 2, 3,
4, 5, 6)" \
 ; do
   case `$echo "$string" | $0 $targs` in
   "") ;;
   *) echo "failed $test test for:
$string" >&2
      pass=false;;
   esac
 done

 # Make sure we print only the lines with blobs.
 test="only blob"
 odd=:
 for string in \
   "= {
1, 0x2, 03, L'\x4'
}" \
       "1, 0x2, 03, L'\x4'" \
\
   "=
{
 '\\x1', '\\002'
 ,
 {
   { \"\\x3\", },
   \"\\004\"
 },
};" \
       "  '\\x1', '\\002'
 ,
 {
   { \"\\x3\", },
   \"\\004\"" \
\
   ".long 1,2
    .long \$3,\$4" \
       ".long 1,2
    .long \$3,\$4" \
\
   "#define X { 1, 2, \\
                3, 4, /* comment */ \\
              }" \
       "#define X { 1, 2, \\
                3, 4, /* comment */ \\" \
\
   "= {
/*
* multi-line
* comment
*/
{
  0x4c00c000, 0x00000000, 0x00060000, 0x00000000,
},
}" \
       "   0x4c00c000, 0x00000000, 0x00060000, 0x00000000," \
\
   "MODULE_FIRMWARE(x);
MODULE_FIRMWARE(y);
1, 2, 3, 4; 5, 6, 7, 8;
9, 10, 11" \
     "MODULE_FIRMWARE(x);
MODULE_FIRMWARE(y);
::: - :::
1, 2, 3, 4; 5, 6, 7, 8;" \
\
   "= {
blob()
accept()
1, 2, 3, 4
}" \
       "blob()
::: - :::
1, 2, 3, 4" \
\
   "a blobeol y
x" \
       "a blobeol y
x" \
\
 ; do
   if $odd; then
     input=$string odd=false
     continue
   fi
   case `$echo "$input" | $0 $targs -B` in
   "::: - :::
$string") ;;
   *)
     $echo "failed $test test for:
$input" >&2
     pass=false
     ;;
   esac
   odd=:
 done
 $odd || { echo "internal testsuite failure in $test" >&2; }

 # Make sure we deblob only the blobs.
 test="deblobs"
 odd=:
 for string in \
   "= { 1, 0x2, 03, L'\x4' }" \
       "= { /*(DEBLOBBED)*/' }" \
\
   "=
{
 '\\x1', '\\002'
 ,
 {
   { \"\\x3\", },
   \"\\004\"
 },
};" \
       "  '\\x/*(DEBLOBBED)*/\"" \
\
   ".long 1,2
    .long \$3,\$4" \
       ".long /*(DEBLOBBED)*/" \
\
   "#define X { 1, 2, \\
                3, 4, /* comment */ \\
              }" \
       "#define X { /*(DEBLOBBED)*/, /* comment */ \\" \
\
   "= {
/*
* multi-line
* comment
*/
{
  0x4c00c000, 0x00000000, 0x00060000, 0x00000000,
},
}" \
       "   /*(DEBLOBBED)*/," \
\
   "MODULE_FIRMWARE(x);
MODULE_FIRMWARE(y);
1, 2, 3, 4; 5, 6; 7, 8, 9, 10;
9, 10, 11" \
     "/*(DEBLOBBED)*/
::: - :::
/*(DEBLOBBED)*/; 5, 6; /*(DEBLOBBED)*/;" \
\
   "= {
accept() blob() x blob(
) y
}" \
       "accept() /*(DEBLOBBED)*/ x /*(DEBLOBBED)*/ y" \
\
   "= {
accept() blob() x blob(
w) y
}" \
       "accept() /*(DEBLOBBED)*/ x /*(DEBLOBBED)*/ y" \
\
   "a blobeol y
x" \
       "a /*(DEBLOBBED)*/x" \
\
 ; do
   if $odd; then
     input=$string odd=false
     continue
   fi
   case `$echo "$input" | $0 $targs -b` in
   "::: - :::
$string") ;;
   *)
     $echo "failed $test test for:
$input" >&2
     pass=false
     ;;
   esac
   odd=:
 done
 $odd || { echo "internal testsuite failure in $test" >&2; }

 # How did we do?
 if $pass; then
   echo success for $tool
 else
   allpass=$pass
 fi
done
$allpass
exit
fi

# Call addx as needed to set up more patterns to be recognized as
# false positives.  Takes the input filename in $1.

set_except () {
 case $prefix$1 in
 */*linux*.tar* | */*kernel*.tar* | */*linux-*.*/*)
   # false alarms, contain source
   # drivers/net/wan/wanxlfw.inc_shipped -> wanxlfw.S
   accept 'static[ ]u8[ ]firmware\[\]=[{][\n]0x60,\(0x00,\)*0x16,\(0x00,\)*\([\n]\(0x[0-9A-F][0-9A-F],\)*\)*[\n]0x23,0xFC,0x00,0x00,0x00,0x01,0xFF,0xF9,0x00,0xD4,0x61,0x00,0x06,0x74,0x33,0xFC,\([\n]\(0x[0-9A-F][0-9A-F],\)*\)*0x00[\n][}][;]'
   # drivers/usb/serial/xircom_pgs_fw.h -> xircom_pgs.S
   initnc 'static[ ]const[ ]struct[ ]ezusb_hex_record[ ]xircom_pgs_firmware\[\][ ]='
   # drivers/usb/serial/keyspan_pda_fw_h -> keyspan_pda.S
   initnc 'static[ ]const[ ]struct[ ]ezusb_hex_record[ ]keyspan_pda_firmware\[\][ ]='
   # arch/m68k/ifpsp060/*.sa -> src/*.s
   accept '[   ]\.long[        ]0x60ff0000,0x02360000,0x60ff0000,0x16260000[\n]'"$sepx$blobpat*"
   accept '[   ]\.long[        ]0x60ff0000,0x17400000,0x60ff0000,0x15f40000[\n]'"$sepx$blobpat*"
   # arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped -> spu_save.c
   initnc 'static[ ]unsigned[ ]int[ ]spu_save_code\[\][ ][ ]__attribute__[(][(]__aligned__[(]128[)][)][)][ ]='
   # arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped -> spu_restore.c
   initnc 'static[ ]unsigned[ ]int[ ]spu_restore_code\[\][ ][ ]__attribute__[(][(]__aligned__[(]128[)][)][)][ ]='
   # drivers/net/ixp2000/ixp2400_tx.ucode -> ixp2400_tx.uc
   initnc '[   ]\.initial_reg_values[  ]=[ ][(]struct[ ]ixp2000_reg_value[ ]\[\][)][ ][{]' drivers/net/ixp2000/ixp2400_tx.ucode
   # drivers/net/ixp2000/ixp2400_rx.ucode -> ixp2400_rx.uc
   initnc '[   ]\.initial_reg_values[  ]=[ ][(]struct[ ]ixp2000_reg_value[ ]\[\][)][ ][{]' drivers/net/ixp2000/ixp2400_rx.ucode


   # checked:

   accept '[   ][$]3[ ]=[ ][{][{]pge[ ]=[ ][{][{]ste[ ]=[ ][{]\(\([0-9][0-9a-fx{},\n   ]*\|\(pge\|ste\)[ ]=\|<repeats[ ][0-9]\+[ ]times>\)[{},\n       ]*\)*<repeats[ ]11[ ]times>[}]$'
   accept '__clz_tab:[\n][     ]\.byte[        ]0\(,[0-5]\)\+'"$sepx$blobpat*" arch/sparc/lib/divdi3.S
   accept 'PITBL:[\n][ ][ ]\.long[ ][ ]0xC0040000,0xC90FDAA2,'"$blobpat*" arch/sparc/lib/divdi3.S
   accept '\(0x[0F][0F],\)\+\\[\n]\(\(0x[0F][0F],\)\+\\[\n]\)*\(0x[0F][0F],\)\+0x00' arch/m68k/mac/mac_penguin.S
   accept '\.lowcase:[\n][     ]\.byte[ ]0x00\(,0x0[1-7]\)\+'"$sepx$blobpat*"'$' arch/s390/kernel/head.S
   accept '_zb_findmap:[\n][ ][ ][ ][ ][ ][ ][ ][ ][ ]\.byte[ ][ ]0\(,[123],0\)\+,4'"$sepx$blobpat*"'$' arch/s390/kernel/bitmap.S
   accept '_sb_findmap:[\n][ ][ ][ ][ ][ ][ ][ ][ ][ ]\.byte[ ][ ]8\(,0,[123]\)\+,0'"$sepx$blobpat*"'$' arch/s390/kernel/bitmap.S
   accept '[   ]\.section[ ]__ex_table,["]a["]'"$sepx$blobpat*" arch/powerpc/lib/copyuser_64.S
   accept '[   ]memcpy[(]src,[ ]["]\\x01\\x00\\x00\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00["].*PROGxxxx' arch/powerpc/platforms/iseries/mf.c
   initnc 'static[ ]const[ ]unsigned[ ]int[ ]cpu_745x\[2\]\[16\][ ]=' arch/ppc/platforms/ev64260.c
   initnc 'const[ ]unsigned[ ]char[ ]__flsm1_tab\[256\][ ]=' arch/alpha/lib/fls.c
   accept '#define[ ]_MAP_0_32_ASCII_SEG7_NON_PRINTABLE[       ]\\[\n][        ]\(0,\)\+$' 'drivers/input/misc/map_to_7segment\.h\|include/linux/map_to_7segment\.h'
   initc '[    ]static[ ]int[ ][ ][ ][ ][ ][ ]init_values_b\[\][ ]=' sound/oss/ad1848.c
   initnc 'static[ ]unsigned[ ]char[ ]atkbd_set2_keycode\[512\][ ]=' drivers/input/keyboard/atkbd.c
   accept 'desc_config1:[\n][  ]\.byte[ ]0x09,[ ]0x02'"$sepx$blobpat*" 'drivers/usb/serial/\(keyspan_pda\|xircom_pgs\).S'
   accept 'string_mfg:[\n]\?\([;]\?[   ]\.byte[^\n]*[\n]\)\+string_mfg_end:' 'drivers/usb/serial/\(keyspan_pda\|xircom_pgs\).S'
   accept 'string_product:[\n]\?\([;]\?[       ]\.byte[^\n]*[\n]\)\+string_product_end:' 'drivers/usb/serial/\(keyspan_pda\|xircom_pgs\).S'
   accept '[ ][ ][ ][/][*][ ]\(SQCIF\|QSIF\|QCIF\|SIF\|CIF\|VGA\)[ ][*][/][\n][ ][ ][ ][{][\n][ ][ ][ ][ ][ ][ ][{]'"$blobpat*" drivers/media/video/pwc/pwc-nala.h
   accept 'P[13]\([\n]#[^\n]*\)*[\n]*\([\n][0-9 ]*\)\+' drivers/video/logo/*.ppm
   accept 'for[ ]i[ ]in[ ][    0-9\\\n]*[\n]do' 'Documentation/specialix\.txt|Documentation/serial/specialix\.txt'
   accept '[ ][ ][ ][ ][ ][ ][ ][ ][ ]:[ ][ ][ ]3600000[ ][ ][ ]3400000[ ][ ][ ]3200000[ ][ ][ ]3000000[ ][ ][ ]2800000[ ]' Documentation/cpu-freq/cpufreq-stats.txt
   accept '00[ ]00[\n]64[ ]01[\n]8e[ ]0b[\n][\n][0-9a-f \n]*fe[ ]fe' 'Documentation/scsi/\(sym\|ncr\)53c8xx_2.txt'
   accept '0f[ ]00[ ]08[ ]08[ ]64[ ]00[ ]0a[ ]00[ ]-[ ]id[ ]0[\n]'"$blobpat*" 'Documentation/scsi/\(sym\|ncr\)53c8xx_2.txt'
   accept 'default[ ]nvram[ ]data:'"$sepx$blobpat*" 'Documentation/scsi/\(sym\|ncr\)53c8xx_2.txt'
   accept '0x0458[ ][ ][ ][ ][ ]0x7025[\n]'"$blobpat*" Documentation/video4linux/sn9c102.txt
   accept '0x102c[ ][ ][ ][ ][ ]0x6151[\n]'"$blobpat*" Documentation/video4linux/et61x251.txt
   accept '0x041e[ ][ ][ ][ ][ ]0x4017[\n]'"$blobpat*" Documentation/video4linux/zc0301.txt
   accept '[ ][ ][(]gdb[)][ ]x[/]100x[ ][$]25[\n][ ][ ]0x507d2434:[ ][ ][ ][ ][ ]0x507d2434[ ][ ][ ][ ][ ][ ]0x00000000[ ][ ][ ][ ][ ][ ]0x08048000[ ][ ][ ][ ][ ][ ]0x080a4f8c'"$sepx$blobpat*" Documentation/uml/UserModeLinux-HOWTO.txt
   accept '[ ][ ][ ][ ][ ][ ]1[ ][ ]0[ ][ ]0[ ][ ]0[ ][ ]0x308'"$sepx$blobpat*" Documentation/isdn/README.inc
   accept 'domain<N>[ ]<cpumask>[ ]1[ ]2[ ]3[ ]4[ ]5[ ]6[ ]7[ ]8[ ]9[ ]10[ ]11[ ]12[ ]13[ ]14[ ]15[ ]16[ ]17[ ]18[ ]19[ ]20[ ]21[ ]22[ ]23[ ]24[ ]25[ ]26[ ]27[ ]28[ ]29[ ]30[ ]31[ ]32[ ]33[ ]34[ ]35[ ]36$' Documentation/sched-stats.txt
   accept '[ * ]*0[ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ]1[ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ]2[ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ][ ]3[\n][ *        ]*0[ ]1[ ]2[ ]3[ ]4[ ]5[ ]6[ ]7[ ]8[ ]9[ ]0[ ]1[ ]2[ ]3[ ]4[ ]5[ ]6[ ]7[ ]8[ ]9[ ]0[ ]1[ ]2[ ]3[ ]4[ ]5[ ]6[ ]7[ ]8[ ]9[ ]0[ ]1' 'net/\(netfilter\|ipv4\)/ipvs/ip_vs_sync.c|net/sctp/sm_make_chunk.c|include/linux/scpt.h'
   accept '[ ][*][ ][ ]1[ ]1[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]1[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0[ ]0' arch/x86/lguest/boot.c
   ocomment '[ ][/][*][ ]Configure[ ]the[ ]PCI[ ]bus[ ]bursts[ ]and[ ]FIFO[ ]thresholds.' drivers/net/fealnx.c
   ocomment '[/][*][ ]the[ ]original[ ]LUT[ ]values[ ]from[ ]Alex[ ]van[ ]Kaam[ ]<darkside@chello\.nl>' drivers/hwmon/via686a.c
   initc 'static[ ]const[ ]unsigned[ ]char[ ]init\[\][ ]=[ ][{][^;]*MODE=0[ ][;].*SAA_7114_NTSC_HSYNC_START' drivers/media/video/saa7114.c

   defsnc 'static[ ]struct[ ]cipher_testvec[ ]\(aes\|anubis\|bf\|camellia\|cts_mode\|des3_ede\|cast6\|salsa20_stream\|serpent\|tf\|tnepres\|xeta\|x\?tea\)\(_\(cbc\|ctr\(_rfc3686\)\?\|xts\)\)\?_\(enc\|dec\)_tv_template\[\][ ]=' 'crypto/\(tcrypt\|testmgr\).h'
   defsnc 'static[ ]struct[ ]comp_testvec[ ]\(deflate\|lzo\)_\(de\)\?comp_tv_template\[\][ ]=' 'crypto/\(tcrypt\|testmgr\).h'
   defsnc 'static[ ]struct[ ]hash_testvec[ ]\(aes_xcbc128\|crc32c\|hmac_sha2\(24\|56\)\|\(sha\|wp\)\(256\|384\|512\)\)_tv_template\[\][ ]=' 'crypto/\(tcrypt\|testmgr\).h'
   # initnc '[         ]*\.\(digest\|entries\|input\|key\|output\|plaintext\|result\)[ \n      ]*=[ ][{"]' 'crypto/\(tcrypt\|testmgr\).h'

   defsnc 'static[ ]\(const[ ]\)\?RegInitializer[ ]initData\[\][ ]__initdata[ ]=' 'drivers/ide/ali14xx\.c\|drivers/ide/legacy/ali14xx\.c'
   defsnc 'static[ ]const[ ]u8[ ]setup\[\][ ]=' 'drivers/ide/delkin_cb\.c\|drivers/ide/pci/delkin_cb\.c'
   defsnc 'static[ ]u8[ ]cvs_time_value\[\]\[XFER_UDMA_6[ ]-[ ]XFER_UDMA_0[ ][+][ ]1\][ ]=' 'drivers/ide/sis5513\.c\|drivers/ide/pci/sis5513\.c'
   defsnc 'static[ ]u8[ ]\(act\|ini\|rco\)_time_value\[\]\[8\][ ]=' 'drivers/ide/sis5513\.c\|drivers/ide/pci/sis5513\.c'
   defsnc 'static[ ]const[ ]u8[ ]speedtab[ ]\[3\]\[12\][ ]=' 'drivers/ide/umc8672\.c\|drivers/ide/legacy/umc8672\.c'
   defsnc 'static[ ]const[ ]s8[ ]\(b43\(legacy\)\?\|bcm43xx\)_tssi2dbm_[bg]_table\[\][ ]=' net/wireless/b43/phy.c
   defsnc 'static[ ]const[ ]char[ ]zr360[56]0_dht\[0x1a4\][ ]=' 'drivers/media/video/zr36060\.c\|drivers/media/video/zoran/zr36060\.c'
   defsnc 'static[ ]const[ ]char[ ]zr360[56]0_dqt\[0x86\][ ]=' 'drivers/media/video/zr36060\.c\|drivers/media/video/zoran/zr36060\.c'
   defsnc 'static[ ]u8[ ]tas3004_treble_table\[\][ ]=' sound/aoa/codecs/tas-basstreble.h

   # This file contains firmwares that we deblob with high
   # sensitivity, so make sure the sequences of numbers that are not
   # blobs are not deblobbed.  FIXME: we should have patterns to
   # recognize the blobs instead.
   defsnc '[   ]static[ ]const[ ]u32[ ]test_pat\[4\]\[6\][ ]=' drivers/net/tg3.c
   accept "[   ][}]\\(,\\?[ ]mem_tbl_5\\(70x\\|705\\|755\\|906\\)\\[\\][ ]=[ ][{]$sepx$blobpat*$sepx[}]\\)*[;]" drivers/net/tg3.c

   # end of generic checked expressions.
   # version-specific checked bits start here

   # removed in 2.6.28
   defsnc 'static[ ]unsigned[ ]char[ ]irq_xlate\[32\][ ]=' arch/sparc/kernel/sun4m_irq.c
   defsnc 'static[ ]int[ ]logitech_expanded_keymap\[LOGITECH_EXPANDED_KEYMAP_SIZE\][ ]=' drivers/hid/hid-input.c
   defsnc '[   ]static[ ]const[ ]\(__\)\?u8[ ]\(read_indexs\|n\(set\)\?[0-9]*\(_other\)\?\|missing\)\[[0-9x]*\][ ]=' drivers/media/video/gspca/t613.c
   defsnc 'static[ ]const[ ]u_char[ ]nand_ecc_precalc_table\[\][ ]=' drivers/mtd/nand/nand_ecc.c
   oprepline '#define[ ]AR5K_RATES_\(11[ABG]\|TURBO\|XR\)[ ]' drivers/net/wireless/ath5k/ath5k.h
   defsnc 'static[ ]const[ ]struct[ ]ath_hal[ ]ar5416hal[ ]=' drivers/net/wireless/ath9k/hw.c
   defsnc 'const[ ]unsigned[ ]char[ ]INIT_2\[127\][ ]=' drivers/video/omap/lcd_sx1.c

   # removed in 2.6.24
   accept "[ ]Psize[ ][ ][ ][ ]Ipps[ ][ ][ ][ ][ ][ ][ ]Tput[ ][ ][ ][ ][ ]Rxint[ ][ ][ ][ ][ ]Txint[ ][ ][ ][ ]Done[ ][ ][ ][ ][ ]Ndone[\\n][ ]---------------------------------------------------------------\\([\\n][ 0-9]\\+\\)\\+"'$'
   initnc 'static[ ]u_short[ ]ataplain_map\[NR_KEYS\][ ]__initdata[ ]='
   initnc '[   ]static[ ]const[ ]unsigned[ ]char[ ]invert5\[\][ ]='
   initnc 'static[ ]unsigned[ ]char[ ]alpa2target\[\][ ]='
   initnc 'static[ ]unsigned[ ]char[ ]target2alpa\[\][ ]='
   oprepline '#define[ ]INIT_THREAD[ ][{0},]\+[        ]*\\[\n][       ]*[{0},]\+'
   initnc 'static[ ]uint[ ]tas300\(1c\|4\)_\(master\|mixer\|treble\|bass\)_tab\[\]='
   initnc 'static[ ]short[ ]dmasound_[au]law2dma16\[\][ ]='
   initnc 'static[ ]const[ ]unsigned[ ]short[ ]DACVolTable\[101\][ ]='

   # removed in 2.6.23
   initnc 'static[ ]const[ ]UQItype[ ]__clz_tab\[\][ ]=' arch/arm26/lib/udivdi3.c
   initnc '[   ]static[ ]unsigned[ ]char[ ]scale\[101\][ ]=' sound/oss/opl3sa2.c
   initnc '[}][ ]syncs\[\][ ]=' drivers/scsi/53c7xx.c
   initnc 'genoa_md:'"$sepx$blobpat*"'[\n][    ]\.ascii[       ]["]Genoa["]' arch/i386/boot/video.S

   # removed in 2.6.22
   initnc 'Vendor[ ]ID[ ][ ]Product[ ]ID[\n]-\+[ ][ ]-\+[\n]'"$blobpat*" Documentation/video4linux/sn9c102.txt
   defsnc 'static[ ]short[ ][au]law2dma16\[\]' arch/ppc/8xx_io/cs4218_tdm.c
   defsnc '[   ]static[ ]const[ ]char[ ]minimal_ascii_table\[\]' drivers/ieee1394/csr1212.c
   defsnc 'static[ ]u16[ ]key_map[ ]\[256\][ ]=' drivers/media/dvb/ttpci/av7110_ir.c
   defsnc 'static[ ]unsigned[ ]char[ ]gf64_inv\[64\][ ]=' drivers/mtd/nand/cafe_ecc.c
   defsnc 'static[ ]unsigned[ ]short[ ]err_pos_lut\[4096\][ ]=' drivers/mtd/nand/cafe_ecc.c
   defsnc 'static[ ]unsigned[ ]char[ ]testdata\[TESTDATA_LEN\][ ]=' fs/jffs2/comprtest.c

   # added in 2.6.25
   accept "%canned_values[ ]=[ ][(][\\n][      ]\\([0-9]\\+[ ]=>[ ]\\[[        \\n]\\+\\(\\([0-9]\\+\\|\\'0x[0-9a-f]\\+\\'\\),[        \\n]*\\)*\\]\\(,[ ]\\|[\\n]\\)\\)*[)][;]"

   # from 2.6.25-rc* patches
   initnc '[   ]int[ ]bcomm_irq\[3[*]16\][ ]='
   initnc '[   ]stati