I'm looking for a way to include/exclude files patterns and exclude directories from a os.walk()
call.
Here's what I'm doing by now:
import fnmatch
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def _filter(paths):
for path in paths:
if os.path.isdir(path) and not path in excludes:
yield path
for pattern in (includes + excludes):
if not os.path.isdir(path) and fnmatch.fnmatch(path, pattern):
yield path
for root, dirs, files in os.walk('/home/paulo-freitas'):
dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
files[:] = _filter(map(lambda f: os.path.join(root, f), files))
for filename in files:
filename = os.path.join(root, filename)
print(filename)
Is there a better way to do this? How?
This solution uses fnmatch.translate
to convert glob patterns to regular expressions (it assumes the includes only is used for files):
import fnmatch
import os
import os.path
import re
includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files
# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'
for root, dirs, files in os.walk('/home/paulo-freitas'):
# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]
# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]
for fname in files:
print fname