blob: bbb6fecaca1b07080bcb11121875011ecf5dd829 [file] [log] [blame]
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +00001#!/usr/bin/env python
2# encoding: utf-8
3# Baptiste Lepilleur, 2009
4
5from dircache import listdir
6import re
7import fnmatch
8import os.path
9
10
11# These fnmatch expressions are used by default to prune the directory tree
12# while doing the recursive traversal in the glob_impl method of glob function.
13prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
14
15# These fnmatch expressions are used by default to exclude files and dirs
16# while doing the recursive traversal in the glob_impl method of glob function.
17##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
18
19# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
20# while doing the recursive traversal in the glob_impl method of glob function.
21default_excludes = '''
22**/*~
23**/#*#
24**/.#*
25**/%*%
26**/._*
27**/CVS
28**/CVS/**
29**/.cvsignore
30**/SCCS
31**/SCCS/**
32**/vssver.scc
33**/.svn
34**/.svn/**
35**/.git
36**/.git/**
37**/.gitignore
38**/.bzr
39**/.bzr/**
40**/.hg
41**/.hg/**
42**/_MTN
43**/_MTN/**
44**/_darcs
45**/_darcs/**
46**/.DS_Store '''
47
48DIR = 1
49FILE = 2
50DIR_LINK = 4
51FILE_LINK = 8
52LINKS = DIR_LINK | FILE_LINK
53ALL_NO_LINK = DIR | FILE
54ALL = DIR | FILE | LINKS
55
56_ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' )
57
58def ant_pattern_to_re( ant_pattern ):
59 """Generates a regular expression from the ant pattern.
60 Matching convention:
61 **/a: match 'a', 'dir/a', 'dir1/dir2/a'
62 a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
63 *.py: match 'script.py' but not 'a/script.py'
64 """
65 rex = ['^']
66 next_pos = 0
67 sep_rex = r'(?:/|%s)' % re.escape( os.path.sep )
68## print 'Converting', ant_pattern
69 for match in _ANT_RE.finditer( ant_pattern ):
70## print 'Matched', match.group()
71## print match.start(0), next_pos
72 if match.start(0) != next_pos:
73 raise ValueError( "Invalid ant pattern" )
74 if match.group(1): # /**/
75 rex.append( sep_rex + '(?:.*%s)?' % sep_rex )
76 elif match.group(2): # **/
77 rex.append( '(?:.*%s)?' % sep_rex )
78 elif match.group(3): # /**
79 rex.append( sep_rex + '.*' )
80 elif match.group(4): # *
81 rex.append( '[^/%s]*' % re.escape(os.path.sep) )
82 elif match.group(5): # /
83 rex.append( sep_rex )
84 else: # somepath
85 rex.append( re.escape(match.group(6)) )
86 next_pos = match.end()
87 rex.append('$')
88 return re.compile( ''.join( rex ) )
89
90def _as_list( l ):
91 if isinstance(l, basestring):
92 return l.split()
93 return l
94
95def glob(dir_path,
96 includes = '**/*',
97 excludes = default_excludes,
98 entry_type = FILE,
99 prune_dirs = prune_dirs,
100 max_depth = 25):
101 include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
102 exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
103 prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
104 dir_path = dir_path.replace('/',os.path.sep)
105 entry_type_filter = entry_type
106
107 def is_pruned_dir( dir_name ):
108 for pattern in prune_dirs:
109 if fnmatch.fnmatch( dir_name, pattern ):
110 return True
111 return False
112
113 def apply_filter( full_path, filter_rexs ):
114 """Return True if at least one of the filter regular expression match full_path."""
115 for rex in filter_rexs:
116 if rex.match( full_path ):
117 return True
118 return False
119
120 def glob_impl( root_dir_path ):
121 child_dirs = [root_dir_path]
122 while child_dirs:
123 dir_path = child_dirs.pop()
124 for entry in listdir( dir_path ):
125 full_path = os.path.join( dir_path, entry )
126## print 'Testing:', full_path,
127 is_dir = os.path.isdir( full_path )
128 if is_dir and not is_pruned_dir( entry ): # explore child directory ?
129## print '===> marked for recursion',
130 child_dirs.append( full_path )
131 included = apply_filter( full_path, include_filter )
132 rejected = apply_filter( full_path, exclude_filter )
133 if not included or rejected: # do not include entry ?
134## print '=> not included or rejected'
135 continue
136 link = os.path.islink( full_path )
137 is_file = os.path.isfile( full_path )
138 if not is_file and not is_dir:
139## print '=> unknown entry type'
140 continue
141 if link:
142 entry_type = is_file and FILE_LINK or DIR_LINK
143 else:
144 entry_type = is_file and FILE or DIR
145## print '=> type: %d' % entry_type,
146 if (entry_type & entry_type_filter) != 0:
147## print ' => KEEP'
148 yield os.path.join( dir_path, entry )
149## else:
150## print ' => TYPE REJECTED'
151 return list( glob_impl( dir_path ) )
152
153
154if __name__ == "__main__":
155 import unittest
156
157 class AntPatternToRETest(unittest.TestCase):
158## def test_conversion( self ):
159## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern )
160
161 def test_matching( self ):
162 test_cases = [ ( 'path',
163 ['path'],
164 ['somepath', 'pathsuffix', '/path', '/path'] ),
165 ( '*.py',
166 ['source.py', 'source.ext.py', '.py'],
167 ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ),
168 ( '**/path',
169 ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
170 ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ),
171 ( 'path/**',
172 ['path/a', 'path/path/a', 'path//'],
173 ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ),
174 ( '/**/path',
175 ['/path', '/a/path', '/a/b/path/path', '/path/path'],
176 ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ),
177 ( 'a/b',
178 ['a/b'],
179 ['somea/b', 'a/bsuffix', 'a/b/c'] ),
180 ( '**/*.py',
181 ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
182 ['script.pyc', 'script.pyo', 'a.py/b'] ),
183 ( 'src/**/*.py',
184 ['src/a.py', 'src/dir/a.py'],
185 ['a/src/a.py', '/src/a.py'] ),
186 ]
187 for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
188 def local_path( paths ):
189 return [ p.replace('/',os.path.sep) for p in paths ]
190 test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) )
191 for ant_pattern, accepted_matches, rejected_matches in test_cases:
192 rex = ant_pattern_to_re( ant_pattern )
193 print 'ant_pattern:', ant_pattern, ' => ', rex.pattern
194 for accepted_match in accepted_matches:
195 print 'Accepted?:', accepted_match
196 self.assert_( rex.match( accepted_match ) is not None )
197 for rejected_match in rejected_matches:
198 print 'Rejected?:', rejected_match
199 self.assert_( rex.match( rejected_match ) is None )
200
201 unittest.main()