Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # encoding: utf-8 |
Devin Jeanpierre | 19fc55f | 2017-04-24 10:49:00 -0700 | [diff] [blame] | 3 | # Copyright 2009 The JsonCpp Authors |
Sam Clegg | 6386061 | 2015-04-09 18:01:33 -0700 | [diff] [blame] | 4 | # Distributed under MIT license, or public domain if desired and |
| 5 | # recognized in your jurisdiction. |
| 6 | # See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 7 | |
Christopher Dunn | bd1e895 | 2014-11-19 23:30:47 -0600 | [diff] [blame] | 8 | from __future__ import print_function |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 9 | from dircache import listdir |
| 10 | import re |
| 11 | import fnmatch |
| 12 | import os.path |
| 13 | |
| 14 | |
| 15 | # These fnmatch expressions are used by default to prune the directory tree |
| 16 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 17 | prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS ' |
| 18 | |
| 19 | # These fnmatch expressions are used by default to exclude files and dirs |
| 20 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 21 | ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split() |
| 22 | |
| 23 | # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree |
| 24 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 25 | default_excludes = ''' |
| 26 | **/*~ |
| 27 | **/#*# |
| 28 | **/.#* |
| 29 | **/%*% |
| 30 | **/._* |
| 31 | **/CVS |
| 32 | **/CVS/** |
| 33 | **/.cvsignore |
| 34 | **/SCCS |
| 35 | **/SCCS/** |
| 36 | **/vssver.scc |
| 37 | **/.svn |
| 38 | **/.svn/** |
| 39 | **/.git |
| 40 | **/.git/** |
| 41 | **/.gitignore |
| 42 | **/.bzr |
| 43 | **/.bzr/** |
| 44 | **/.hg |
| 45 | **/.hg/** |
| 46 | **/_MTN |
| 47 | **/_MTN/** |
| 48 | **/_darcs |
| 49 | **/_darcs/** |
| 50 | **/.DS_Store ''' |
| 51 | |
| 52 | DIR = 1 |
| 53 | FILE = 2 |
| 54 | DIR_LINK = 4 |
| 55 | FILE_LINK = 8 |
| 56 | LINKS = DIR_LINK | FILE_LINK |
| 57 | ALL_NO_LINK = DIR | FILE |
| 58 | ALL = DIR | FILE | LINKS |
| 59 | |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 60 | _ANT_RE = re.compile(r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)') |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 61 | |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 62 | def ant_pattern_to_re(ant_pattern): |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 63 | """Generates a regular expression from the ant pattern. |
| 64 | Matching convention: |
| 65 | **/a: match 'a', 'dir/a', 'dir1/dir2/a' |
| 66 | a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b' |
| 67 | *.py: match 'script.py' but not 'a/script.py' |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 68 | """ |
| 69 | rex = ['^'] |
| 70 | next_pos = 0 |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 71 | sep_rex = r'(?:/|%s)' % re.escape(os.path.sep) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 72 | ## print 'Converting', ant_pattern |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 73 | for match in _ANT_RE.finditer(ant_pattern): |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 74 | ## print 'Matched', match.group() |
| 75 | ## print match.start(0), next_pos |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 76 | if match.start(0) != next_pos: |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 77 | raise ValueError("Invalid ant pattern") |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 78 | if match.group(1): # /**/ |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 79 | rex.append(sep_rex + '(?:.*%s)?' % sep_rex) |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 80 | elif match.group(2): # **/ |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 81 | rex.append('(?:.*%s)?' % sep_rex) |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 82 | elif match.group(3): # /** |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 83 | rex.append(sep_rex + '.*') |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 84 | elif match.group(4): # * |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 85 | rex.append('[^/%s]*' % re.escape(os.path.sep)) |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 86 | elif match.group(5): # / |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 87 | rex.append(sep_rex) |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 88 | else: # somepath |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 89 | rex.append(re.escape(match.group(6))) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 90 | next_pos = match.end() |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 91 | rex.append('$') |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 92 | return re.compile(''.join(rex)) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 93 | |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 94 | def _as_list(l): |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 95 | if isinstance(l, basestring): |
| 96 | return l.split() |
| 97 | return l |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 98 | |
| 99 | def glob(dir_path, |
| 100 | includes = '**/*', |
| 101 | excludes = default_excludes, |
| 102 | entry_type = FILE, |
| 103 | prune_dirs = prune_dirs, |
| 104 | max_depth = 25): |
| 105 | include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)] |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 106 | exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)] |
| 107 | prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)] |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 108 | dir_path = dir_path.replace('/',os.path.sep) |
| 109 | entry_type_filter = entry_type |
| 110 | |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 111 | def is_pruned_dir(dir_name): |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 112 | for pattern in prune_dirs: |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 113 | if fnmatch.fnmatch(dir_name, pattern): |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 114 | return True |
| 115 | return False |
| 116 | |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 117 | def apply_filter(full_path, filter_rexs): |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 118 | """Return True if at least one of the filter regular expression match full_path.""" |
| 119 | for rex in filter_rexs: |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 120 | if rex.match(full_path): |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 121 | return True |
| 122 | return False |
| 123 | |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 124 | def glob_impl(root_dir_path): |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 125 | child_dirs = [root_dir_path] |
| 126 | while child_dirs: |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 127 | dir_path = child_dirs.pop() |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 128 | for entry in listdir(dir_path): |
| 129 | full_path = os.path.join(dir_path, entry) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 130 | ## print 'Testing:', full_path, |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 131 | is_dir = os.path.isdir(full_path) |
| 132 | if is_dir and not is_pruned_dir(entry): # explore child directory ? |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 133 | ## print '===> marked for recursion', |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 134 | child_dirs.append(full_path) |
| 135 | included = apply_filter(full_path, include_filter) |
| 136 | rejected = apply_filter(full_path, exclude_filter) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 137 | if not included or rejected: # do not include entry ? |
| 138 | ## print '=> not included or rejected' |
| 139 | continue |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 140 | link = os.path.islink(full_path) |
| 141 | is_file = os.path.isfile(full_path) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 142 | if not is_file and not is_dir: |
| 143 | ## print '=> unknown entry type' |
| 144 | continue |
| 145 | if link: |
| 146 | entry_type = is_file and FILE_LINK or DIR_LINK |
| 147 | else: |
| 148 | entry_type = is_file and FILE or DIR |
| 149 | ## print '=> type: %d' % entry_type, |
| 150 | if (entry_type & entry_type_filter) != 0: |
| 151 | ## print ' => KEEP' |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 152 | yield os.path.join(dir_path, entry) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 153 | ## else: |
| 154 | ## print ' => TYPE REJECTED' |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 155 | return list(glob_impl(dir_path)) |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 156 | |
| 157 | |
| 158 | if __name__ == "__main__": |
| 159 | import unittest |
| 160 | |
| 161 | class AntPatternToRETest(unittest.TestCase): |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 162 | ## def test_conversion(self): |
| 163 | ## self.assertEqual('^somepath$', ant_pattern_to_re('somepath').pattern) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 164 | |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 165 | def test_matching(self): |
| 166 | test_cases = [ ('path', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 167 | ['path'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 168 | ['somepath', 'pathsuffix', '/path', '/path']), |
| 169 | ('*.py', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 170 | ['source.py', 'source.ext.py', '.py'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 171 | ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c']), |
| 172 | ('**/path', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 173 | ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 174 | ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath']), |
| 175 | ('path/**', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 176 | ['path/a', 'path/path/a', 'path//'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 177 | ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a']), |
| 178 | ('/**/path', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 179 | ['/path', '/a/path', '/a/b/path/path', '/path/path'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 180 | ['path', 'path/', 'a/path', '/pathsuffix', '/somepath']), |
| 181 | ('a/b', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 182 | ['a/b'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 183 | ['somea/b', 'a/bsuffix', 'a/b/c']), |
| 184 | ('**/*.py', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 185 | ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 186 | ['script.pyc', 'script.pyo', 'a.py/b']), |
| 187 | ('src/**/*.py', |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 188 | ['src/a.py', 'src/dir/a.py'], |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 189 | ['a/src/a.py', '/src/a.py']), |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 190 | ] |
| 191 | for ant_pattern, accepted_matches, rejected_matches in list(test_cases): |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 192 | def local_path(paths): |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 193 | return [ p.replace('/',os.path.sep) for p in paths ] |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 194 | test_cases.append((ant_pattern, local_path(accepted_matches), local_path(rejected_matches))) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 195 | for ant_pattern, accepted_matches, rejected_matches in test_cases: |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 196 | rex = ant_pattern_to_re(ant_pattern) |
Christopher Dunn | bd1e895 | 2014-11-19 23:30:47 -0600 | [diff] [blame] | 197 | print('ant_pattern:', ant_pattern, ' => ', rex.pattern) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 198 | for accepted_match in accepted_matches: |
Christopher Dunn | bd1e895 | 2014-11-19 23:30:47 -0600 | [diff] [blame] | 199 | print('Accepted?:', accepted_match) |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 200 | self.assertTrue(rex.match(accepted_match) is not None) |
Christopher Dunn | dc0f736 | 2011-06-21 21:18:49 +0000 | [diff] [blame] | 201 | for rejected_match in rejected_matches: |
Christopher Dunn | bd1e895 | 2014-11-19 23:30:47 -0600 | [diff] [blame] | 202 | print('Rejected?:', rejected_match) |
Christopher Dunn | 494950a | 2015-01-24 15:29:52 -0600 | [diff] [blame] | 203 | self.assertTrue(rex.match(rejected_match) is None) |
Baptiste Lepilleur | 7c171ee | 2010-02-23 08:44:52 +0000 | [diff] [blame] | 204 | |
| 205 | unittest.main() |