blob: afd4c48b02ee963e8713856e71aca943e48c23e2 [file] [log] [blame]
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +00001#!/usr/bin/env python
2# encoding: utf-8
3# Baptiste Lepilleur, 2009
4
Christopher Dunnbd1e8952014-11-19 23:30:47 -06005from __future__ import print_function
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +00006from dircache import listdir
7import re
8import fnmatch
9import os.path
10
11
12# These fnmatch expressions are used by default to prune the directory tree
13# while doing the recursive traversal in the glob_impl method of glob function.
14prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
15
16# These fnmatch expressions are used by default to exclude files and dirs
17# while doing the recursive traversal in the glob_impl method of glob function.
18##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
19
20# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
21# while doing the recursive traversal in the glob_impl method of glob function.
22default_excludes = '''
23**/*~
24**/#*#
25**/.#*
26**/%*%
27**/._*
28**/CVS
29**/CVS/**
30**/.cvsignore
31**/SCCS
32**/SCCS/**
33**/vssver.scc
34**/.svn
35**/.svn/**
36**/.git
37**/.git/**
38**/.gitignore
39**/.bzr
40**/.bzr/**
41**/.hg
42**/.hg/**
43**/_MTN
44**/_MTN/**
45**/_darcs
46**/_darcs/**
47**/.DS_Store '''
48
49DIR = 1
50FILE = 2
51DIR_LINK = 4
52FILE_LINK = 8
53LINKS = DIR_LINK | FILE_LINK
54ALL_NO_LINK = DIR | FILE
55ALL = DIR | FILE | LINKS
56
Christopher Dunn494950a2015-01-24 15:29:52 -060057_ANT_RE = re.compile(r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)')
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000058
Christopher Dunn494950a2015-01-24 15:29:52 -060059def ant_pattern_to_re(ant_pattern):
Christopher Dunndc0f7362011-06-21 21:18:49 +000060 """Generates a regular expression from the ant pattern.
61 Matching convention:
62 **/a: match 'a', 'dir/a', 'dir1/dir2/a'
63 a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
64 *.py: match 'script.py' but not 'a/script.py'
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000065 """
66 rex = ['^']
67 next_pos = 0
Christopher Dunn494950a2015-01-24 15:29:52 -060068 sep_rex = r'(?:/|%s)' % re.escape(os.path.sep)
Christopher Dunndc0f7362011-06-21 21:18:49 +000069## print 'Converting', ant_pattern
Christopher Dunn494950a2015-01-24 15:29:52 -060070 for match in _ANT_RE.finditer(ant_pattern):
Christopher Dunndc0f7362011-06-21 21:18:49 +000071## print 'Matched', match.group()
72## print match.start(0), next_pos
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000073 if match.start(0) != next_pos:
Christopher Dunn494950a2015-01-24 15:29:52 -060074 raise ValueError("Invalid ant pattern")
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000075 if match.group(1): # /**/
Christopher Dunn494950a2015-01-24 15:29:52 -060076 rex.append(sep_rex + '(?:.*%s)?' % sep_rex)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000077 elif match.group(2): # **/
Christopher Dunn494950a2015-01-24 15:29:52 -060078 rex.append('(?:.*%s)?' % sep_rex)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000079 elif match.group(3): # /**
Christopher Dunn494950a2015-01-24 15:29:52 -060080 rex.append(sep_rex + '.*')
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000081 elif match.group(4): # *
Christopher Dunn494950a2015-01-24 15:29:52 -060082 rex.append('[^/%s]*' % re.escape(os.path.sep))
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000083 elif match.group(5): # /
Christopher Dunn494950a2015-01-24 15:29:52 -060084 rex.append(sep_rex)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000085 else: # somepath
Christopher Dunn494950a2015-01-24 15:29:52 -060086 rex.append(re.escape(match.group(6)))
Christopher Dunndc0f7362011-06-21 21:18:49 +000087 next_pos = match.end()
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000088 rex.append('$')
Christopher Dunn494950a2015-01-24 15:29:52 -060089 return re.compile(''.join(rex))
Christopher Dunndc0f7362011-06-21 21:18:49 +000090
Christopher Dunn494950a2015-01-24 15:29:52 -060091def _as_list(l):
Christopher Dunndc0f7362011-06-21 21:18:49 +000092 if isinstance(l, basestring):
93 return l.split()
94 return l
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000095
96def glob(dir_path,
97 includes = '**/*',
98 excludes = default_excludes,
99 entry_type = FILE,
100 prune_dirs = prune_dirs,
101 max_depth = 25):
102 include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
Christopher Dunndc0f7362011-06-21 21:18:49 +0000103 exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
104 prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000105 dir_path = dir_path.replace('/',os.path.sep)
106 entry_type_filter = entry_type
107
Christopher Dunn494950a2015-01-24 15:29:52 -0600108 def is_pruned_dir(dir_name):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000109 for pattern in prune_dirs:
Christopher Dunn494950a2015-01-24 15:29:52 -0600110 if fnmatch.fnmatch(dir_name, pattern):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000111 return True
112 return False
113
Christopher Dunn494950a2015-01-24 15:29:52 -0600114 def apply_filter(full_path, filter_rexs):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000115 """Return True if at least one of the filter regular expression match full_path."""
116 for rex in filter_rexs:
Christopher Dunn494950a2015-01-24 15:29:52 -0600117 if rex.match(full_path):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000118 return True
119 return False
120
Christopher Dunn494950a2015-01-24 15:29:52 -0600121 def glob_impl(root_dir_path):
Christopher Dunndc0f7362011-06-21 21:18:49 +0000122 child_dirs = [root_dir_path]
123 while child_dirs:
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000124 dir_path = child_dirs.pop()
Christopher Dunn494950a2015-01-24 15:29:52 -0600125 for entry in listdir(dir_path):
126 full_path = os.path.join(dir_path, entry)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000127## print 'Testing:', full_path,
Christopher Dunn494950a2015-01-24 15:29:52 -0600128 is_dir = os.path.isdir(full_path)
129 if is_dir and not is_pruned_dir(entry): # explore child directory ?
Christopher Dunndc0f7362011-06-21 21:18:49 +0000130## print '===> marked for recursion',
Christopher Dunn494950a2015-01-24 15:29:52 -0600131 child_dirs.append(full_path)
132 included = apply_filter(full_path, include_filter)
133 rejected = apply_filter(full_path, exclude_filter)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000134 if not included or rejected: # do not include entry ?
135## print '=> not included or rejected'
136 continue
Christopher Dunn494950a2015-01-24 15:29:52 -0600137 link = os.path.islink(full_path)
138 is_file = os.path.isfile(full_path)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000139 if not is_file and not is_dir:
140## print '=> unknown entry type'
141 continue
142 if link:
143 entry_type = is_file and FILE_LINK or DIR_LINK
144 else:
145 entry_type = is_file and FILE or DIR
146## print '=> type: %d' % entry_type,
147 if (entry_type & entry_type_filter) != 0:
148## print ' => KEEP'
Christopher Dunn494950a2015-01-24 15:29:52 -0600149 yield os.path.join(dir_path, entry)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000150## else:
151## print ' => TYPE REJECTED'
Christopher Dunn494950a2015-01-24 15:29:52 -0600152 return list(glob_impl(dir_path))
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000153
154
155if __name__ == "__main__":
156 import unittest
157
158 class AntPatternToRETest(unittest.TestCase):
Christopher Dunn494950a2015-01-24 15:29:52 -0600159## def test_conversion(self):
160## self.assertEqual('^somepath$', ant_pattern_to_re('somepath').pattern)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000161
Christopher Dunn494950a2015-01-24 15:29:52 -0600162 def test_matching(self):
163 test_cases = [ ('path',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000164 ['path'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600165 ['somepath', 'pathsuffix', '/path', '/path']),
166 ('*.py',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000167 ['source.py', 'source.ext.py', '.py'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600168 ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c']),
169 ('**/path',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000170 ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600171 ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath']),
172 ('path/**',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000173 ['path/a', 'path/path/a', 'path//'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600174 ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a']),
175 ('/**/path',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000176 ['/path', '/a/path', '/a/b/path/path', '/path/path'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600177 ['path', 'path/', 'a/path', '/pathsuffix', '/somepath']),
178 ('a/b',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000179 ['a/b'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600180 ['somea/b', 'a/bsuffix', 'a/b/c']),
181 ('**/*.py',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000182 ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600183 ['script.pyc', 'script.pyo', 'a.py/b']),
184 ('src/**/*.py',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000185 ['src/a.py', 'src/dir/a.py'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600186 ['a/src/a.py', '/src/a.py']),
Christopher Dunndc0f7362011-06-21 21:18:49 +0000187 ]
188 for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
Christopher Dunn494950a2015-01-24 15:29:52 -0600189 def local_path(paths):
Christopher Dunndc0f7362011-06-21 21:18:49 +0000190 return [ p.replace('/',os.path.sep) for p in paths ]
Christopher Dunn494950a2015-01-24 15:29:52 -0600191 test_cases.append((ant_pattern, local_path(accepted_matches), local_path(rejected_matches)))
Christopher Dunndc0f7362011-06-21 21:18:49 +0000192 for ant_pattern, accepted_matches, rejected_matches in test_cases:
Christopher Dunn494950a2015-01-24 15:29:52 -0600193 rex = ant_pattern_to_re(ant_pattern)
Christopher Dunnbd1e8952014-11-19 23:30:47 -0600194 print('ant_pattern:', ant_pattern, ' => ', rex.pattern)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000195 for accepted_match in accepted_matches:
Christopher Dunnbd1e8952014-11-19 23:30:47 -0600196 print('Accepted?:', accepted_match)
Christopher Dunn494950a2015-01-24 15:29:52 -0600197 self.assertTrue(rex.match(accepted_match) is not None)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000198 for rejected_match in rejected_matches:
Christopher Dunnbd1e8952014-11-19 23:30:47 -0600199 print('Rejected?:', rejected_match)
Christopher Dunn494950a2015-01-24 15:29:52 -0600200 self.assertTrue(rex.match(rejected_match) is None)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000201
202 unittest.main()