blob: 75615ed41be1894b4ab02b3c1d73558dd2220c77 [file] [log] [blame]
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +00001#!/usr/bin/env python
2# encoding: utf-8
Devin Jeanpierre19fc55f2017-04-24 10:49:00 -07003# Copyright 2009 The JsonCpp Authors
Sam Clegg63860612015-04-09 18:01:33 -07004# Distributed under MIT license, or public domain if desired and
5# recognized in your jurisdiction.
6# See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +00007
Christopher Dunnbd1e8952014-11-19 23:30:47 -06008from __future__ import print_function
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +00009from dircache import listdir
10import re
11import fnmatch
12import os.path
13
14
15# These fnmatch expressions are used by default to prune the directory tree
16# while doing the recursive traversal in the glob_impl method of glob function.
17prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
18
19# These fnmatch expressions are used by default to exclude files and dirs
20# while doing the recursive traversal in the glob_impl method of glob function.
21##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
22
23# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
24# while doing the recursive traversal in the glob_impl method of glob function.
25default_excludes = '''
26**/*~
27**/#*#
28**/.#*
29**/%*%
30**/._*
31**/CVS
32**/CVS/**
33**/.cvsignore
34**/SCCS
35**/SCCS/**
36**/vssver.scc
37**/.svn
38**/.svn/**
39**/.git
40**/.git/**
41**/.gitignore
42**/.bzr
43**/.bzr/**
44**/.hg
45**/.hg/**
46**/_MTN
47**/_MTN/**
48**/_darcs
49**/_darcs/**
50**/.DS_Store '''
51
52DIR = 1
53FILE = 2
54DIR_LINK = 4
55FILE_LINK = 8
56LINKS = DIR_LINK | FILE_LINK
57ALL_NO_LINK = DIR | FILE
58ALL = DIR | FILE | LINKS
59
Christopher Dunn494950a2015-01-24 15:29:52 -060060_ANT_RE = re.compile(r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)')
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000061
Christopher Dunn494950a2015-01-24 15:29:52 -060062def ant_pattern_to_re(ant_pattern):
Christopher Dunndc0f7362011-06-21 21:18:49 +000063 """Generates a regular expression from the ant pattern.
64 Matching convention:
65 **/a: match 'a', 'dir/a', 'dir1/dir2/a'
66 a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
67 *.py: match 'script.py' but not 'a/script.py'
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000068 """
69 rex = ['^']
70 next_pos = 0
Christopher Dunn494950a2015-01-24 15:29:52 -060071 sep_rex = r'(?:/|%s)' % re.escape(os.path.sep)
Christopher Dunndc0f7362011-06-21 21:18:49 +000072## print 'Converting', ant_pattern
Christopher Dunn494950a2015-01-24 15:29:52 -060073 for match in _ANT_RE.finditer(ant_pattern):
Christopher Dunndc0f7362011-06-21 21:18:49 +000074## print 'Matched', match.group()
75## print match.start(0), next_pos
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000076 if match.start(0) != next_pos:
Christopher Dunn494950a2015-01-24 15:29:52 -060077 raise ValueError("Invalid ant pattern")
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000078 if match.group(1): # /**/
Christopher Dunn494950a2015-01-24 15:29:52 -060079 rex.append(sep_rex + '(?:.*%s)?' % sep_rex)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000080 elif match.group(2): # **/
Christopher Dunn494950a2015-01-24 15:29:52 -060081 rex.append('(?:.*%s)?' % sep_rex)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000082 elif match.group(3): # /**
Christopher Dunn494950a2015-01-24 15:29:52 -060083 rex.append(sep_rex + '.*')
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000084 elif match.group(4): # *
Christopher Dunn494950a2015-01-24 15:29:52 -060085 rex.append('[^/%s]*' % re.escape(os.path.sep))
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000086 elif match.group(5): # /
Christopher Dunn494950a2015-01-24 15:29:52 -060087 rex.append(sep_rex)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000088 else: # somepath
Christopher Dunn494950a2015-01-24 15:29:52 -060089 rex.append(re.escape(match.group(6)))
Christopher Dunndc0f7362011-06-21 21:18:49 +000090 next_pos = match.end()
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000091 rex.append('$')
Christopher Dunn494950a2015-01-24 15:29:52 -060092 return re.compile(''.join(rex))
Christopher Dunndc0f7362011-06-21 21:18:49 +000093
Christopher Dunn494950a2015-01-24 15:29:52 -060094def _as_list(l):
Christopher Dunndc0f7362011-06-21 21:18:49 +000095 if isinstance(l, basestring):
96 return l.split()
97 return l
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +000098
99def glob(dir_path,
100 includes = '**/*',
101 excludes = default_excludes,
102 entry_type = FILE,
103 prune_dirs = prune_dirs,
104 max_depth = 25):
105 include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
Christopher Dunndc0f7362011-06-21 21:18:49 +0000106 exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
107 prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000108 dir_path = dir_path.replace('/',os.path.sep)
109 entry_type_filter = entry_type
110
Christopher Dunn494950a2015-01-24 15:29:52 -0600111 def is_pruned_dir(dir_name):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000112 for pattern in prune_dirs:
Christopher Dunn494950a2015-01-24 15:29:52 -0600113 if fnmatch.fnmatch(dir_name, pattern):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000114 return True
115 return False
116
Christopher Dunn494950a2015-01-24 15:29:52 -0600117 def apply_filter(full_path, filter_rexs):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000118 """Return True if at least one of the filter regular expression match full_path."""
119 for rex in filter_rexs:
Christopher Dunn494950a2015-01-24 15:29:52 -0600120 if rex.match(full_path):
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000121 return True
122 return False
123
Christopher Dunn494950a2015-01-24 15:29:52 -0600124 def glob_impl(root_dir_path):
Christopher Dunndc0f7362011-06-21 21:18:49 +0000125 child_dirs = [root_dir_path]
126 while child_dirs:
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000127 dir_path = child_dirs.pop()
Christopher Dunn494950a2015-01-24 15:29:52 -0600128 for entry in listdir(dir_path):
129 full_path = os.path.join(dir_path, entry)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000130## print 'Testing:', full_path,
Christopher Dunn494950a2015-01-24 15:29:52 -0600131 is_dir = os.path.isdir(full_path)
132 if is_dir and not is_pruned_dir(entry): # explore child directory ?
Christopher Dunndc0f7362011-06-21 21:18:49 +0000133## print '===> marked for recursion',
Christopher Dunn494950a2015-01-24 15:29:52 -0600134 child_dirs.append(full_path)
135 included = apply_filter(full_path, include_filter)
136 rejected = apply_filter(full_path, exclude_filter)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000137 if not included or rejected: # do not include entry ?
138## print '=> not included or rejected'
139 continue
Christopher Dunn494950a2015-01-24 15:29:52 -0600140 link = os.path.islink(full_path)
141 is_file = os.path.isfile(full_path)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000142 if not is_file and not is_dir:
143## print '=> unknown entry type'
144 continue
145 if link:
146 entry_type = is_file and FILE_LINK or DIR_LINK
147 else:
148 entry_type = is_file and FILE or DIR
149## print '=> type: %d' % entry_type,
150 if (entry_type & entry_type_filter) != 0:
151## print ' => KEEP'
Christopher Dunn494950a2015-01-24 15:29:52 -0600152 yield os.path.join(dir_path, entry)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000153## else:
154## print ' => TYPE REJECTED'
Christopher Dunn494950a2015-01-24 15:29:52 -0600155 return list(glob_impl(dir_path))
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000156
157
158if __name__ == "__main__":
159 import unittest
160
161 class AntPatternToRETest(unittest.TestCase):
Christopher Dunn494950a2015-01-24 15:29:52 -0600162## def test_conversion(self):
163## self.assertEqual('^somepath$', ant_pattern_to_re('somepath').pattern)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000164
Christopher Dunn494950a2015-01-24 15:29:52 -0600165 def test_matching(self):
166 test_cases = [ ('path',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000167 ['path'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600168 ['somepath', 'pathsuffix', '/path', '/path']),
169 ('*.py',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000170 ['source.py', 'source.ext.py', '.py'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600171 ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c']),
172 ('**/path',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000173 ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600174 ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath']),
175 ('path/**',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000176 ['path/a', 'path/path/a', 'path//'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600177 ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a']),
178 ('/**/path',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000179 ['/path', '/a/path', '/a/b/path/path', '/path/path'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600180 ['path', 'path/', 'a/path', '/pathsuffix', '/somepath']),
181 ('a/b',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000182 ['a/b'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600183 ['somea/b', 'a/bsuffix', 'a/b/c']),
184 ('**/*.py',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000185 ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600186 ['script.pyc', 'script.pyo', 'a.py/b']),
187 ('src/**/*.py',
Christopher Dunndc0f7362011-06-21 21:18:49 +0000188 ['src/a.py', 'src/dir/a.py'],
Christopher Dunn494950a2015-01-24 15:29:52 -0600189 ['a/src/a.py', '/src/a.py']),
Christopher Dunndc0f7362011-06-21 21:18:49 +0000190 ]
191 for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
Christopher Dunn494950a2015-01-24 15:29:52 -0600192 def local_path(paths):
Christopher Dunndc0f7362011-06-21 21:18:49 +0000193 return [ p.replace('/',os.path.sep) for p in paths ]
Christopher Dunn494950a2015-01-24 15:29:52 -0600194 test_cases.append((ant_pattern, local_path(accepted_matches), local_path(rejected_matches)))
Christopher Dunndc0f7362011-06-21 21:18:49 +0000195 for ant_pattern, accepted_matches, rejected_matches in test_cases:
Christopher Dunn494950a2015-01-24 15:29:52 -0600196 rex = ant_pattern_to_re(ant_pattern)
Christopher Dunnbd1e8952014-11-19 23:30:47 -0600197 print('ant_pattern:', ant_pattern, ' => ', rex.pattern)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000198 for accepted_match in accepted_matches:
Christopher Dunnbd1e8952014-11-19 23:30:47 -0600199 print('Accepted?:', accepted_match)
Christopher Dunn494950a2015-01-24 15:29:52 -0600200 self.assertTrue(rex.match(accepted_match) is not None)
Christopher Dunndc0f7362011-06-21 21:18:49 +0000201 for rejected_match in rejected_matches:
Christopher Dunnbd1e8952014-11-19 23:30:47 -0600202 print('Rejected?:', rejected_match)
Christopher Dunn494950a2015-01-24 15:29:52 -0600203 self.assertTrue(rex.match(rejected_match) is None)
Baptiste Lepilleur7c171ee2010-02-23 08:44:52 +0000204
205 unittest.main()