Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # encoding: utf-8 |
| 3 | # Baptiste Lepilleur, 2009 |
| 4 | |
| 5 | from dircache import listdir |
| 6 | import re |
| 7 | import fnmatch |
| 8 | import os.path |
| 9 | |
| 10 | |
| 11 | # These fnmatch expressions are used by default to prune the directory tree |
| 12 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 13 | prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS ' |
| 14 | |
| 15 | # These fnmatch expressions are used by default to exclude files and dirs |
| 16 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 17 | ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split() |
| 18 | |
| 19 | # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree |
| 20 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 21 | default_excludes = ''' |
| 22 | **/*~ |
| 23 | **/#*# |
| 24 | **/.#* |
| 25 | **/%*% |
| 26 | **/._* |
| 27 | **/CVS |
| 28 | **/CVS/** |
| 29 | **/.cvsignore |
| 30 | **/SCCS |
| 31 | **/SCCS/** |
| 32 | **/vssver.scc |
| 33 | **/.svn |
| 34 | **/.svn/** |
| 35 | **/.git |
| 36 | **/.git/** |
| 37 | **/.gitignore |
| 38 | **/.bzr |
| 39 | **/.bzr/** |
| 40 | **/.hg |
| 41 | **/.hg/** |
| 42 | **/_MTN |
| 43 | **/_MTN/** |
| 44 | **/_darcs |
| 45 | **/_darcs/** |
| 46 | **/.DS_Store ''' |
| 47 | |
| 48 | DIR = 1 |
| 49 | FILE = 2 |
| 50 | DIR_LINK = 4 |
| 51 | FILE_LINK = 8 |
| 52 | LINKS = DIR_LINK | FILE_LINK |
| 53 | ALL_NO_LINK = DIR | FILE |
| 54 | ALL = DIR | FILE | LINKS |
| 55 | |
| 56 | _ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' ) |
| 57 | |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 58 | def ant_pattern_to_re( ant_pattern ): |
| 59 | """Generates a regular expression from the ant pattern. |
| 60 | Matching convention: |
| 61 | **/a: match 'a', 'dir/a', 'dir1/dir2/a' |
| 62 | a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b' |
| 63 | *.py: match 'script.py' but not 'a/script.py' |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 64 | """ |
| 65 | rex = ['^'] |
| 66 | next_pos = 0 |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 67 | sep_rex = r'(?:/|%s)' % re.escape( os.path.sep ) |
| 68 | ## print 'Converting', ant_pattern |
| 69 | for match in _ANT_RE.finditer( ant_pattern ): |
| 70 | ## print 'Matched', match.group() |
| 71 | ## print match.start(0), next_pos |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 72 | if match.start(0) != next_pos: |
| 73 | raise ValueError( "Invalid ant pattern" ) |
| 74 | if match.group(1): # /**/ |
| 75 | rex.append( sep_rex + '(?:.*%s)?' % sep_rex ) |
| 76 | elif match.group(2): # **/ |
| 77 | rex.append( '(?:.*%s)?' % sep_rex ) |
| 78 | elif match.group(3): # /** |
| 79 | rex.append( sep_rex + '.*' ) |
| 80 | elif match.group(4): # * |
| 81 | rex.append( '[^/%s]*' % re.escape(os.path.sep) ) |
| 82 | elif match.group(5): # / |
| 83 | rex.append( sep_rex ) |
| 84 | else: # somepath |
| 85 | rex.append( re.escape(match.group(6)) ) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 86 | next_pos = match.end() |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 87 | rex.append('$') |
| 88 | return re.compile( ''.join( rex ) ) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 89 | |
| 90 | def _as_list( l ): |
| 91 | if isinstance(l, basestring): |
| 92 | return l.split() |
| 93 | return l |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 94 | |
| 95 | def glob(dir_path, |
| 96 | includes = '**/*', |
| 97 | excludes = default_excludes, |
| 98 | entry_type = FILE, |
| 99 | prune_dirs = prune_dirs, |
| 100 | max_depth = 25): |
| 101 | include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)] |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 102 | exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)] |
| 103 | prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)] |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 104 | dir_path = dir_path.replace('/',os.path.sep) |
| 105 | entry_type_filter = entry_type |
| 106 | |
| 107 | def is_pruned_dir( dir_name ): |
| 108 | for pattern in prune_dirs: |
| 109 | if fnmatch.fnmatch( dir_name, pattern ): |
| 110 | return True |
| 111 | return False |
| 112 | |
| 113 | def apply_filter( full_path, filter_rexs ): |
| 114 | """Return True if at least one of the filter regular expression match full_path.""" |
| 115 | for rex in filter_rexs: |
| 116 | if rex.match( full_path ): |
| 117 | return True |
| 118 | return False |
| 119 | |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 120 | def glob_impl( root_dir_path ): |
| 121 | child_dirs = [root_dir_path] |
| 122 | while child_dirs: |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 123 | dir_path = child_dirs.pop() |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 124 | for entry in listdir( dir_path ): |
| 125 | full_path = os.path.join( dir_path, entry ) |
| 126 | ## print 'Testing:', full_path, |
| 127 | is_dir = os.path.isdir( full_path ) |
| 128 | if is_dir and not is_pruned_dir( entry ): # explore child directory ? |
| 129 | ## print '===> marked for recursion', |
| 130 | child_dirs.append( full_path ) |
| 131 | included = apply_filter( full_path, include_filter ) |
| 132 | rejected = apply_filter( full_path, exclude_filter ) |
| 133 | if not included or rejected: # do not include entry ? |
| 134 | ## print '=> not included or rejected' |
| 135 | continue |
| 136 | link = os.path.islink( full_path ) |
| 137 | is_file = os.path.isfile( full_path ) |
| 138 | if not is_file and not is_dir: |
| 139 | ## print '=> unknown entry type' |
| 140 | continue |
| 141 | if link: |
| 142 | entry_type = is_file and FILE_LINK or DIR_LINK |
| 143 | else: |
| 144 | entry_type = is_file and FILE or DIR |
| 145 | ## print '=> type: %d' % entry_type, |
| 146 | if (entry_type & entry_type_filter) != 0: |
| 147 | ## print ' => KEEP' |
| 148 | yield os.path.join( dir_path, entry ) |
| 149 | ## else: |
| 150 | ## print ' => TYPE REJECTED' |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 151 | return list( glob_impl( dir_path ) ) |
| 152 | |
| 153 | |
| 154 | if __name__ == "__main__": |
| 155 | import unittest |
| 156 | |
| 157 | class AntPatternToRETest(unittest.TestCase): |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 158 | ## def test_conversion( self ): |
| 159 | ## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern ) |
| 160 | |
| 161 | def test_matching( self ): |
| 162 | test_cases = [ ( 'path', |
| 163 | ['path'], |
| 164 | ['somepath', 'pathsuffix', '/path', '/path'] ), |
| 165 | ( '*.py', |
| 166 | ['source.py', 'source.ext.py', '.py'], |
| 167 | ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ), |
| 168 | ( '**/path', |
| 169 | ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'], |
| 170 | ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ), |
| 171 | ( 'path/**', |
| 172 | ['path/a', 'path/path/a', 'path//'], |
| 173 | ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ), |
| 174 | ( '/**/path', |
| 175 | ['/path', '/a/path', '/a/b/path/path', '/path/path'], |
| 176 | ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ), |
| 177 | ( 'a/b', |
| 178 | ['a/b'], |
| 179 | ['somea/b', 'a/bsuffix', 'a/b/c'] ), |
| 180 | ( '**/*.py', |
| 181 | ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'], |
| 182 | ['script.pyc', 'script.pyo', 'a.py/b'] ), |
| 183 | ( 'src/**/*.py', |
| 184 | ['src/a.py', 'src/dir/a.py'], |
| 185 | ['a/src/a.py', '/src/a.py'] ), |
| 186 | ] |
| 187 | for ant_pattern, accepted_matches, rejected_matches in list(test_cases): |
| 188 | def local_path( paths ): |
| 189 | return [ p.replace('/',os.path.sep) for p in paths ] |
| 190 | test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) ) |
| 191 | for ant_pattern, accepted_matches, rejected_matches in test_cases: |
| 192 | rex = ant_pattern_to_re( ant_pattern ) |
| 193 | print 'ant_pattern:', ant_pattern, ' => ', rex.pattern |
| 194 | for accepted_match in accepted_matches: |
| 195 | print 'Accepted?:', accepted_match |
| 196 | self.assert_( rex.match( accepted_match ) is not None ) |
| 197 | for rejected_match in rejected_matches: |
| 198 | print 'Rejected?:', rejected_match |
| 199 | self.assert_( rex.match( rejected_match ) is None ) |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 200 | |
| 201 | unittest.main() |