blob: bd45fdba4b1b2807cfb32b223ec613ef05de6ee7 [file] [log] [blame]
José Fonseca0badbf02011-04-12 09:17:32 +01001#!/usr/bin/env python
2##########################################################################
3#
4# Copyright 2011 Jose Fonseca
5# All Rights Reserved.
6#
7# Permission is hereby granted, free of charge, to any person obtaining a copy
8# of this software and associated documentation files (the "Software"), to deal
9# in the Software without restriction, including without limitation the rights
10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11# copies of the Software, and to permit persons to whom the Software is
12# furnished to do so, subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be included in
15# all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23# THE SOFTWARE.
24#
25##########################################################################/
26
27
28import json
José Fonsecae26d30e2011-12-11 13:37:51 +000029import optparse
30import re
José Fonseca0badbf02011-04-12 09:17:32 +010031import sys
32
33
José Fonsecae147b612011-12-04 15:32:03 +000034def strip_object_hook(obj):
José Fonseca0badbf02011-04-12 09:17:32 +010035 if '__class__' in obj:
36 return None
37 for name in obj.keys():
38 if name.startswith('__') and name.endswith('__'):
39 del obj[name]
40 return obj
41
42
43class Visitor:
44
45 def visit(self, node, *args, **kwargs):
46 if isinstance(node, dict):
José Fonseca54f304a2012-01-14 19:33:08 +000047 return self.visitObject(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010048 elif isinstance(node, list):
José Fonseca54f304a2012-01-14 19:33:08 +000049 return self.visitArray(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010050 else:
José Fonseca54f304a2012-01-14 19:33:08 +000051 return self.visitValue(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010052
José Fonseca54f304a2012-01-14 19:33:08 +000053 def visitObject(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010054 pass
55
José Fonseca54f304a2012-01-14 19:33:08 +000056 def visitArray(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010057 pass
58
José Fonseca54f304a2012-01-14 19:33:08 +000059 def visitValue(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010060 pass
61
62
63class Dumper(Visitor):
64
José Fonseca0b956fd2011-06-04 22:51:45 +010065 def __init__(self, stream = sys.stdout):
66 self.stream = stream
José Fonsecae147b612011-12-04 15:32:03 +000067 self.level = 0
José Fonseca0badbf02011-04-12 09:17:32 +010068
69 def _write(self, s):
José Fonseca0b956fd2011-06-04 22:51:45 +010070 self.stream.write(s)
José Fonseca0badbf02011-04-12 09:17:32 +010071
72 def _indent(self):
73 self._write(' '*self.level)
74
75 def _newline(self):
76 self._write('\n')
77
José Fonseca54f304a2012-01-14 19:33:08 +000078 def visitObject(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +010079 self.enter_object()
80
81 members = node.keys()
82 members.sort()
83 for i in range(len(members)):
84 name = members[i]
85 value = node[name]
86 self.enter_member(name)
87 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +000088 self.leave_member(i == len(members) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +010089 self.leave_object()
90
91 def enter_object(self):
92 self._write('{')
93 self._newline()
94 self.level += 1
95
96 def enter_member(self, name):
97 self._indent()
98 self._write('%s: ' % name)
99
José Fonseca3bb5dd42011-12-07 10:59:02 +0000100 def leave_member(self, last):
101 if not last:
102 self._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100103 self._newline()
104
105 def leave_object(self):
106 self.level -= 1
107 self._indent()
108 self._write('}')
José Fonseca3bb5dd42011-12-07 10:59:02 +0000109 if self.level <= 0:
110 self._newline()
José Fonseca0badbf02011-04-12 09:17:32 +0100111
José Fonseca54f304a2012-01-14 19:33:08 +0000112 def visitArray(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +0100113 self.enter_array()
114 for i in range(len(node)):
115 value = node[i]
116 self._indent()
117 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000118 if i != len(node) - 1:
José Fonseca0badbf02011-04-12 09:17:32 +0100119 self._write(',')
120 self._newline()
121 self.leave_array()
122
123 def enter_array(self):
124 self._write('[')
125 self._newline()
126 self.level += 1
127
128 def leave_array(self):
129 self.level -= 1
130 self._indent()
131 self._write(']')
132
José Fonseca54f304a2012-01-14 19:33:08 +0000133 def visitValue(self, node):
José Fonseca9a2267d2011-12-07 10:18:06 +0000134 self._write(json.dumps(node))
José Fonseca0badbf02011-04-12 09:17:32 +0100135
136
137
138class Comparer(Visitor):
139
José Fonseca5d4d17e2011-12-13 08:29:55 +0000140 def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
José Fonsecae147b612011-12-04 15:32:03 +0000141 self.ignore_added = ignore_added
José Fonseca5d4d17e2011-12-13 08:29:55 +0000142 self.tolerance = tolerance
José Fonsecae147b612011-12-04 15:32:03 +0000143
José Fonseca54f304a2012-01-14 19:33:08 +0000144 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100145 if not isinstance(b, dict):
146 return False
José Fonsecae147b612011-12-04 15:32:03 +0000147 if len(a) != len(b) and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100148 return False
149 ak = a.keys()
150 bk = b.keys()
151 ak.sort()
152 bk.sort()
José Fonsecae147b612011-12-04 15:32:03 +0000153 if ak != bk and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100154 return False
155 for k in ak:
José Fonsecae147b612011-12-04 15:32:03 +0000156 ae = a[k]
157 try:
158 be = b[k]
159 except KeyError:
160 return False
161 if not self.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100162 return False
163 return True
164
José Fonseca54f304a2012-01-14 19:33:08 +0000165 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100166 if not isinstance(b, list):
167 return False
168 if len(a) != len(b):
169 return False
170 for ae, be in zip(a, b):
171 if not self.visit(ae, be):
172 return False
173 return True
174
José Fonseca54f304a2012-01-14 19:33:08 +0000175 def visitValue(self, a, b):
José Fonsecadebca0c2014-06-06 19:32:58 +0100176 if isinstance(a, float) and isinstance(b, (int, long, float)) or \
177 isinstance(b, float) and isinstance(a, (int, long, float)):
José Fonseca5d4d17e2011-12-13 08:29:55 +0000178 if a == 0:
179 return abs(b) < self.tolerance
180 else:
181 return abs((b - a)/a) < self.tolerance
182 else:
183 return a == b
José Fonseca0badbf02011-04-12 09:17:32 +0100184
185
186class Differ(Visitor):
187
José Fonsecae147b612011-12-04 15:32:03 +0000188 def __init__(self, stream = sys.stdout, ignore_added = False):
José Fonseca0b956fd2011-06-04 22:51:45 +0100189 self.dumper = Dumper(stream)
José Fonsecae147b612011-12-04 15:32:03 +0000190 self.comparer = Comparer(ignore_added = ignore_added)
José Fonseca0badbf02011-04-12 09:17:32 +0100191
192 def visit(self, a, b):
José Fonsecae147b612011-12-04 15:32:03 +0000193 if self.comparer.visit(a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100194 return
195 Visitor.visit(self, a, b)
196
José Fonseca54f304a2012-01-14 19:33:08 +0000197 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100198 if not isinstance(b, dict):
199 self.replace(a, b)
200 else:
201 self.dumper.enter_object()
202 names = set(a.keys())
José Fonseca3bb5dd42011-12-07 10:59:02 +0000203 if not self.comparer.ignore_added:
204 names.update(b.keys())
José Fonseca0badbf02011-04-12 09:17:32 +0100205 names = list(names)
206 names.sort()
207
José Fonseca3bb5dd42011-12-07 10:59:02 +0000208 for i in range(len(names)):
209 name = names[i]
210 ae = a.get(name, None)
José Fonseca0badbf02011-04-12 09:17:32 +0100211 be = b.get(name, None)
José Fonsecae147b612011-12-04 15:32:03 +0000212 if not self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100213 self.dumper.enter_member(name)
214 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000215 self.dumper.leave_member(i == len(names) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +0100216
217 self.dumper.leave_object()
218
José Fonseca54f304a2012-01-14 19:33:08 +0000219 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100220 if not isinstance(b, list):
221 self.replace(a, b)
222 else:
223 self.dumper.enter_array()
José Fonseca3bb5dd42011-12-07 10:59:02 +0000224 max_len = max(len(a), len(b))
225 for i in range(max_len):
José Fonseca0badbf02011-04-12 09:17:32 +0100226 try:
227 ae = a[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100228 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100229 ae = None
230 try:
231 be = b[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100232 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100233 be = None
234 self.dumper._indent()
José Fonsecae147b612011-12-04 15:32:03 +0000235 if self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100236 self.dumper.visit(ae)
237 else:
238 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000239 if i != max_len - 1:
240 self.dumper._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100241 self.dumper._newline()
242
243 self.dumper.leave_array()
244
José Fonseca54f304a2012-01-14 19:33:08 +0000245 def visitValue(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100246 if a != b:
247 self.replace(a, b)
248
249 def replace(self, a, b):
250 self.dumper.visit(a)
251 self.dumper._write(' -> ')
252 self.dumper.visit(b)
253
254
José Fonsecae26d30e2011-12-11 13:37:51 +0000255#
256# Unfortunately JSON standard does not include comments, but this is a quite
257# useful feature to have on regressions tests
258#
259
260_token_res = [
261 r'//[^\r\n]*', # comment
262 r'"[^"\\]*(\\.[^"\\]*)*"', # string
263]
264
265_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
266
267
268def _strip_comment(mo):
269 if mo.group(1):
270 return ''
271 else:
272 return mo.group(0)
273
274
275def _strip_comments(data):
276 '''Strip (non-standard) JSON comments.'''
277 return _tokens_re.sub(_strip_comment, data)
278
279
280assert _strip_comments('''// a comment
281"// a comment in a string
282"''') == '''
283"// a comment in a string
284"'''
285
286
287def load(stream, strip_images = True, strip_comments = True):
288 if strip_images:
José Fonsecae147b612011-12-04 15:32:03 +0000289 object_hook = strip_object_hook
290 else:
291 object_hook = None
José Fonsecae26d30e2011-12-11 13:37:51 +0000292 if strip_comments:
293 data = stream.read()
294 data = _strip_comments(data)
295 return json.loads(data, strict=False, object_hook = object_hook)
296 else:
297 return json.load(stream, strict=False, object_hook = object_hook)
José Fonseca0badbf02011-04-12 09:17:32 +0100298
299
300def main():
José Fonsecae26d30e2011-12-11 13:37:51 +0000301 optparser = optparse.OptionParser(
302 usage="\n\t%prog [options] <ref_json> <src_json>")
303 optparser.add_option(
304 '--keep-images',
305 action="store_false", dest="strip_images", default=True,
306 help="compare images")
307
308 (options, args) = optparser.parse_args(sys.argv[1:])
309
310 if len(args) != 2:
311 optparser.error('incorrect number of arguments')
312
313 a = load(open(sys.argv[1], 'rt'), options.strip_images)
314 b = load(open(sys.argv[2], 'rt'), options.strip_images)
José Fonseca0badbf02011-04-12 09:17:32 +0100315
José Fonseca3bb5dd42011-12-07 10:59:02 +0000316 if False:
317 dumper = Dumper()
318 dumper.visit(a)
José Fonseca0badbf02011-04-12 09:17:32 +0100319
320 differ = Differ()
321 differ.visit(a, b)
322
323
324if __name__ == '__main__':
325 main()