#!/usr/bin/env python

"""\
List python source files.

There are three functions to check whether a file is a Python source, listed
here with increasing complexity:

- has_python_ext() checks whether a file name ends in '.py[w]'.
- look_like_python() checks whether the file is not binary and either has
 the '.py[w]' extension or the first line contains the word 'python'.
- can_be_compiled() checks whether the file can be compiled by compile().

The file also must be of appropriate size - not bigger than a megabyte.

walk_python_files() recursively lists all Python files under the given directories.
"""
__author__ = "Oleg Broytmann, Georg Brandl"

__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]


import sys, os, re

binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')

debug = False

def print_debug(msg):
   if debug: print msg


def _open(fullpath):
   try:
       size = os.stat(fullpath).st_size
   except OSError, err: # Permission denied - ignore the file
       print_debug("%s: permission denied: %s" % (fullpath, err))
       return None

   if size > 1024*1024: # too big
       print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
       return None

   try:
       return open(fullpath, 'rU')
   except IOError, err: # Access denied, or a special file - ignore it
       print_debug("%s: access denied: %s" % (fullpath, err))
       return None

def has_python_ext(fullpath):
   return fullpath.endswith(".py") or fullpath.endswith(".pyw")

def looks_like_python(fullpath):
   infile = _open(fullpath)
   if infile is None:
       return False

   line = infile.readline()
   infile.close()

   if binary_re.search(line):
       # file appears to be binary
       print_debug("%s: appears to be binary" % fullpath)
       return False

   if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
       return True
   elif "python" in line:
       # disguised Python script (e.g. CGI)
       return True

   return False

def can_be_compiled(fullpath):
   infile = _open(fullpath)
   if infile is None:
       return False

   code = infile.read()
   infile.close()

   try:
       compile(code, fullpath, "exec")
   except Exception, err:
       print_debug("%s: cannot compile: %s" % (fullpath, err))
       return False

   return True


def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
   """\
   Recursively yield all Python source files below the given paths.

   paths: a list of files and/or directories to be checked.
   is_python: a function that takes a file name and checks whether it is a
              Python source file
   exclude_dirs: a list of directory base names that should be excluded in
                 the search
   """
   if exclude_dirs is None:
       exclude_dirs=[]

   for path in paths:
       print_debug("testing: %s" % path)
       if os.path.isfile(path):
           if is_python(path):
               yield path
       elif os.path.isdir(path):
           print_debug("    it is a directory")
           for dirpath, dirnames, filenames in os.walk(path):
               for exclude in exclude_dirs:
                   if exclude in dirnames:
                       dirnames.remove(exclude)
               for filename in filenames:
                   fullpath = os.path.join(dirpath, filename)
                   print_debug("testing: %s" % fullpath)
                   if is_python(fullpath):
                       yield fullpath
       else:
           print_debug("    unknown type")


if __name__ == "__main__":
   # Two simple examples/tests
   for fullpath in walk_python_files(['.']):
       print fullpath
   print "----------"
   for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
       print fullpath