blob: 470aeda059b8f2723ddf6796394507f8e347ee2d [file] [log] [blame]
José Fonseca0badbf02011-04-12 09:17:32 +01001#!/usr/bin/env python
2##########################################################################
3#
4# Copyright 2011 Jose Fonseca
5# All Rights Reserved.
6#
7# Permission is hereby granted, free of charge, to any person obtaining a copy
8# of this software and associated documentation files (the "Software"), to deal
9# in the Software without restriction, including without limitation the rights
10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11# copies of the Software, and to permit persons to whom the Software is
12# furnished to do so, subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be included in
15# all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23# THE SOFTWARE.
24#
25##########################################################################/
26
27
28import json
José Fonsecae26d30e2011-12-11 13:37:51 +000029import optparse
30import re
José Fonseca0badbf02011-04-12 09:17:32 +010031import sys
32
33
José Fonsecae147b612011-12-04 15:32:03 +000034def strip_object_hook(obj):
José Fonseca0badbf02011-04-12 09:17:32 +010035 if '__class__' in obj:
36 return None
37 for name in obj.keys():
38 if name.startswith('__') and name.endswith('__'):
39 del obj[name]
40 return obj
41
42
43class Visitor:
44
45 def visit(self, node, *args, **kwargs):
46 if isinstance(node, dict):
José Fonseca54f304a2012-01-14 19:33:08 +000047 return self.visitObject(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010048 elif isinstance(node, list):
José Fonseca54f304a2012-01-14 19:33:08 +000049 return self.visitArray(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010050 else:
José Fonseca54f304a2012-01-14 19:33:08 +000051 return self.visitValue(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010052
José Fonseca54f304a2012-01-14 19:33:08 +000053 def visitObject(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010054 pass
55
José Fonseca54f304a2012-01-14 19:33:08 +000056 def visitArray(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010057 pass
58
José Fonseca54f304a2012-01-14 19:33:08 +000059 def visitValue(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010060 pass
61
62
63class Dumper(Visitor):
64
José Fonseca0b956fd2011-06-04 22:51:45 +010065 def __init__(self, stream = sys.stdout):
66 self.stream = stream
José Fonsecae147b612011-12-04 15:32:03 +000067 self.level = 0
José Fonseca0badbf02011-04-12 09:17:32 +010068
69 def _write(self, s):
José Fonseca0b956fd2011-06-04 22:51:45 +010070 self.stream.write(s)
José Fonseca0badbf02011-04-12 09:17:32 +010071
72 def _indent(self):
73 self._write(' '*self.level)
74
75 def _newline(self):
76 self._write('\n')
77
José Fonseca54f304a2012-01-14 19:33:08 +000078 def visitObject(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +010079 self.enter_object()
80
81 members = node.keys()
82 members.sort()
83 for i in range(len(members)):
84 name = members[i]
85 value = node[name]
86 self.enter_member(name)
87 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +000088 self.leave_member(i == len(members) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +010089 self.leave_object()
90
91 def enter_object(self):
92 self._write('{')
93 self._newline()
94 self.level += 1
95
96 def enter_member(self, name):
97 self._indent()
98 self._write('%s: ' % name)
99
José Fonseca3bb5dd42011-12-07 10:59:02 +0000100 def leave_member(self, last):
101 if not last:
102 self._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100103 self._newline()
104
105 def leave_object(self):
106 self.level -= 1
107 self._indent()
108 self._write('}')
José Fonseca3bb5dd42011-12-07 10:59:02 +0000109 if self.level <= 0:
110 self._newline()
José Fonseca0badbf02011-04-12 09:17:32 +0100111
José Fonseca54f304a2012-01-14 19:33:08 +0000112 def visitArray(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +0100113 self.enter_array()
114 for i in range(len(node)):
115 value = node[i]
116 self._indent()
117 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000118 if i != len(node) - 1:
José Fonseca0badbf02011-04-12 09:17:32 +0100119 self._write(',')
120 self._newline()
121 self.leave_array()
122
123 def enter_array(self):
124 self._write('[')
125 self._newline()
126 self.level += 1
127
128 def leave_array(self):
129 self.level -= 1
130 self._indent()
131 self._write(']')
132
José Fonseca54f304a2012-01-14 19:33:08 +0000133 def visitValue(self, node):
José Fonseca9a2267d2011-12-07 10:18:06 +0000134 self._write(json.dumps(node))
José Fonseca0badbf02011-04-12 09:17:32 +0100135
136
137
138class Comparer(Visitor):
139
José Fonseca5d4d17e2011-12-13 08:29:55 +0000140 def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
José Fonsecae147b612011-12-04 15:32:03 +0000141 self.ignore_added = ignore_added
José Fonseca5d4d17e2011-12-13 08:29:55 +0000142 self.tolerance = tolerance
José Fonsecae147b612011-12-04 15:32:03 +0000143
José Fonseca54f304a2012-01-14 19:33:08 +0000144 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100145 if not isinstance(b, dict):
146 return False
José Fonsecae147b612011-12-04 15:32:03 +0000147 if len(a) != len(b) and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100148 return False
149 ak = a.keys()
150 bk = b.keys()
151 ak.sort()
152 bk.sort()
José Fonsecae147b612011-12-04 15:32:03 +0000153 if ak != bk and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100154 return False
155 for k in ak:
José Fonsecae147b612011-12-04 15:32:03 +0000156 ae = a[k]
157 try:
158 be = b[k]
159 except KeyError:
160 return False
161 if not self.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100162 return False
163 return True
164
José Fonseca54f304a2012-01-14 19:33:08 +0000165 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100166 if not isinstance(b, list):
167 return False
168 if len(a) != len(b):
169 return False
170 for ae, be in zip(a, b):
171 if not self.visit(ae, be):
172 return False
173 return True
174
José Fonseca54f304a2012-01-14 19:33:08 +0000175 def visitValue(self, a, b):
José Fonseca5d4d17e2011-12-13 08:29:55 +0000176 if isinstance(a, float) or isinstance(b, float):
177 if a == 0:
178 return abs(b) < self.tolerance
179 else:
180 return abs((b - a)/a) < self.tolerance
181 else:
182 return a == b
José Fonseca0badbf02011-04-12 09:17:32 +0100183
184
185class Differ(Visitor):
186
José Fonsecae147b612011-12-04 15:32:03 +0000187 def __init__(self, stream = sys.stdout, ignore_added = False):
José Fonseca0b956fd2011-06-04 22:51:45 +0100188 self.dumper = Dumper(stream)
José Fonsecae147b612011-12-04 15:32:03 +0000189 self.comparer = Comparer(ignore_added = ignore_added)
José Fonseca0badbf02011-04-12 09:17:32 +0100190
191 def visit(self, a, b):
José Fonsecae147b612011-12-04 15:32:03 +0000192 if self.comparer.visit(a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100193 return
194 Visitor.visit(self, a, b)
195
José Fonseca54f304a2012-01-14 19:33:08 +0000196 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100197 if not isinstance(b, dict):
198 self.replace(a, b)
199 else:
200 self.dumper.enter_object()
201 names = set(a.keys())
José Fonseca3bb5dd42011-12-07 10:59:02 +0000202 if not self.comparer.ignore_added:
203 names.update(b.keys())
José Fonseca0badbf02011-04-12 09:17:32 +0100204 names = list(names)
205 names.sort()
206
José Fonseca3bb5dd42011-12-07 10:59:02 +0000207 for i in range(len(names)):
208 name = names[i]
209 ae = a.get(name, None)
José Fonseca0badbf02011-04-12 09:17:32 +0100210 be = b.get(name, None)
José Fonsecae147b612011-12-04 15:32:03 +0000211 if not self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100212 self.dumper.enter_member(name)
213 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000214 self.dumper.leave_member(i == len(names) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +0100215
216 self.dumper.leave_object()
217
José Fonseca54f304a2012-01-14 19:33:08 +0000218 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100219 if not isinstance(b, list):
220 self.replace(a, b)
221 else:
222 self.dumper.enter_array()
José Fonseca3bb5dd42011-12-07 10:59:02 +0000223 max_len = max(len(a), len(b))
224 for i in range(max_len):
José Fonseca0badbf02011-04-12 09:17:32 +0100225 try:
226 ae = a[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100227 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100228 ae = None
229 try:
230 be = b[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100231 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100232 be = None
233 self.dumper._indent()
José Fonsecae147b612011-12-04 15:32:03 +0000234 if self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100235 self.dumper.visit(ae)
236 else:
237 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000238 if i != max_len - 1:
239 self.dumper._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100240 self.dumper._newline()
241
242 self.dumper.leave_array()
243
José Fonseca54f304a2012-01-14 19:33:08 +0000244 def visitValue(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100245 if a != b:
246 self.replace(a, b)
247
248 def replace(self, a, b):
249 self.dumper.visit(a)
250 self.dumper._write(' -> ')
251 self.dumper.visit(b)
252
253
José Fonsecae26d30e2011-12-11 13:37:51 +0000254#
255# Unfortunately JSON standard does not include comments, but this is a quite
256# useful feature to have on regressions tests
257#
258
259_token_res = [
260 r'//[^\r\n]*', # comment
261 r'"[^"\\]*(\\.[^"\\]*)*"', # string
262]
263
264_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
265
266
267def _strip_comment(mo):
268 if mo.group(1):
269 return ''
270 else:
271 return mo.group(0)
272
273
274def _strip_comments(data):
275 '''Strip (non-standard) JSON comments.'''
276 return _tokens_re.sub(_strip_comment, data)
277
278
279assert _strip_comments('''// a comment
280"// a comment in a string
281"''') == '''
282"// a comment in a string
283"'''
284
285
286def load(stream, strip_images = True, strip_comments = True):
287 if strip_images:
José Fonsecae147b612011-12-04 15:32:03 +0000288 object_hook = strip_object_hook
289 else:
290 object_hook = None
José Fonsecae26d30e2011-12-11 13:37:51 +0000291 if strip_comments:
292 data = stream.read()
293 data = _strip_comments(data)
294 return json.loads(data, strict=False, object_hook = object_hook)
295 else:
296 return json.load(stream, strict=False, object_hook = object_hook)
José Fonseca0badbf02011-04-12 09:17:32 +0100297
298
299def main():
José Fonsecae26d30e2011-12-11 13:37:51 +0000300 optparser = optparse.OptionParser(
301 usage="\n\t%prog [options] <ref_json> <src_json>")
302 optparser.add_option(
303 '--keep-images',
304 action="store_false", dest="strip_images", default=True,
305 help="compare images")
306
307 (options, args) = optparser.parse_args(sys.argv[1:])
308
309 if len(args) != 2:
310 optparser.error('incorrect number of arguments')
311
312 a = load(open(sys.argv[1], 'rt'), options.strip_images)
313 b = load(open(sys.argv[2], 'rt'), options.strip_images)
José Fonseca0badbf02011-04-12 09:17:32 +0100314
José Fonseca3bb5dd42011-12-07 10:59:02 +0000315 if False:
316 dumper = Dumper()
317 dumper.visit(a)
José Fonseca0badbf02011-04-12 09:17:32 +0100318
319 differ = Differ()
320 differ.visit(a, b)
321
322
323if __name__ == '__main__':
324 main()