blob: a76d0c9ddb7a08029890fea1f8ebe5bb770c1053 [file] [log] [blame]
José Fonseca0badbf02011-04-12 09:17:32 +01001#!/usr/bin/env python
2##########################################################################
3#
4# Copyright 2011 Jose Fonseca
5# All Rights Reserved.
6#
7# Permission is hereby granted, free of charge, to any person obtaining a copy
8# of this software and associated documentation files (the "Software"), to deal
9# in the Software without restriction, including without limitation the rights
10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11# copies of the Software, and to permit persons to whom the Software is
12# furnished to do so, subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be included in
15# all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23# THE SOFTWARE.
24#
25##########################################################################/
26
27
28import json
José Fonsecae26d30e2011-12-11 13:37:51 +000029import optparse
30import re
José Fonseca0badbf02011-04-12 09:17:32 +010031import sys
32
33
José Fonsecae147b612011-12-04 15:32:03 +000034def strip_object_hook(obj):
José Fonseca0badbf02011-04-12 09:17:32 +010035 if '__class__' in obj:
36 return None
37 for name in obj.keys():
38 if name.startswith('__') and name.endswith('__'):
39 del obj[name]
40 return obj
41
42
43class Visitor:
44
45 def visit(self, node, *args, **kwargs):
46 if isinstance(node, dict):
José Fonseca54f304a2012-01-14 19:33:08 +000047 return self.visitObject(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010048 elif isinstance(node, list):
José Fonseca54f304a2012-01-14 19:33:08 +000049 return self.visitArray(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010050 else:
José Fonseca54f304a2012-01-14 19:33:08 +000051 return self.visitValue(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010052
José Fonseca54f304a2012-01-14 19:33:08 +000053 def visitObject(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010054 pass
55
José Fonseca54f304a2012-01-14 19:33:08 +000056 def visitArray(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010057 pass
58
José Fonseca54f304a2012-01-14 19:33:08 +000059 def visitValue(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010060 pass
61
62
63class Dumper(Visitor):
64
José Fonseca0b956fd2011-06-04 22:51:45 +010065 def __init__(self, stream = sys.stdout):
66 self.stream = stream
José Fonsecae147b612011-12-04 15:32:03 +000067 self.level = 0
José Fonseca0badbf02011-04-12 09:17:32 +010068
69 def _write(self, s):
José Fonseca0b956fd2011-06-04 22:51:45 +010070 self.stream.write(s)
José Fonseca0badbf02011-04-12 09:17:32 +010071
72 def _indent(self):
73 self._write(' '*self.level)
74
75 def _newline(self):
76 self._write('\n')
77
José Fonseca54f304a2012-01-14 19:33:08 +000078 def visitObject(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +010079 self.enter_object()
80
81 members = node.keys()
82 members.sort()
83 for i in range(len(members)):
84 name = members[i]
85 value = node[name]
86 self.enter_member(name)
87 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +000088 self.leave_member(i == len(members) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +010089 self.leave_object()
90
91 def enter_object(self):
92 self._write('{')
93 self._newline()
94 self.level += 1
95
96 def enter_member(self, name):
97 self._indent()
98 self._write('%s: ' % name)
99
José Fonseca3bb5dd42011-12-07 10:59:02 +0000100 def leave_member(self, last):
101 if not last:
102 self._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100103 self._newline()
104
105 def leave_object(self):
106 self.level -= 1
107 self._indent()
108 self._write('}')
José Fonseca3bb5dd42011-12-07 10:59:02 +0000109 if self.level <= 0:
110 self._newline()
José Fonseca0badbf02011-04-12 09:17:32 +0100111
José Fonseca54f304a2012-01-14 19:33:08 +0000112 def visitArray(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +0100113 self.enter_array()
114 for i in range(len(node)):
115 value = node[i]
116 self._indent()
117 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000118 if i != len(node) - 1:
José Fonseca0badbf02011-04-12 09:17:32 +0100119 self._write(',')
120 self._newline()
121 self.leave_array()
122
123 def enter_array(self):
124 self._write('[')
125 self._newline()
126 self.level += 1
127
128 def leave_array(self):
129 self.level -= 1
130 self._indent()
131 self._write(']')
132
José Fonseca54f304a2012-01-14 19:33:08 +0000133 def visitValue(self, node):
José Fonseca9a2267d2011-12-07 10:18:06 +0000134 self._write(json.dumps(node))
José Fonseca0badbf02011-04-12 09:17:32 +0100135
136
137
138class Comparer(Visitor):
139
José Fonseca5d4d17e2011-12-13 08:29:55 +0000140 def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
José Fonsecae147b612011-12-04 15:32:03 +0000141 self.ignore_added = ignore_added
José Fonseca5d4d17e2011-12-13 08:29:55 +0000142 self.tolerance = tolerance
José Fonsecae147b612011-12-04 15:32:03 +0000143
José Fonseca54f304a2012-01-14 19:33:08 +0000144 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100145 if not isinstance(b, dict):
146 return False
José Fonsecae147b612011-12-04 15:32:03 +0000147 if len(a) != len(b) and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100148 return False
149 ak = a.keys()
150 bk = b.keys()
151 ak.sort()
152 bk.sort()
José Fonsecae147b612011-12-04 15:32:03 +0000153 if ak != bk and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100154 return False
155 for k in ak:
José Fonsecae147b612011-12-04 15:32:03 +0000156 ae = a[k]
157 try:
158 be = b[k]
159 except KeyError:
160 return False
161 if not self.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100162 return False
163 return True
164
José Fonseca54f304a2012-01-14 19:33:08 +0000165 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100166 if not isinstance(b, list):
167 return False
168 if len(a) != len(b):
169 return False
170 for ae, be in zip(a, b):
171 if not self.visit(ae, be):
172 return False
173 return True
174
José Fonseca54f304a2012-01-14 19:33:08 +0000175 def visitValue(self, a, b):
José Fonsecadebca0c2014-06-06 19:32:58 +0100176 if isinstance(a, float) and isinstance(b, (int, long, float)) or \
177 isinstance(b, float) and isinstance(a, (int, long, float)):
José Fonseca5ae6bdc2014-11-07 11:33:28 +0000178 if a is b:
179 # NaNs take this path
180 return True
181 elif a == b:
182 return True
183 elif a == 0:
José Fonseca5d4d17e2011-12-13 08:29:55 +0000184 return abs(b) < self.tolerance
185 else:
186 return abs((b - a)/a) < self.tolerance
187 else:
188 return a == b
José Fonseca0badbf02011-04-12 09:17:32 +0100189
190
191class Differ(Visitor):
192
José Fonsecae147b612011-12-04 15:32:03 +0000193 def __init__(self, stream = sys.stdout, ignore_added = False):
José Fonseca0b956fd2011-06-04 22:51:45 +0100194 self.dumper = Dumper(stream)
José Fonsecae147b612011-12-04 15:32:03 +0000195 self.comparer = Comparer(ignore_added = ignore_added)
José Fonseca0badbf02011-04-12 09:17:32 +0100196
197 def visit(self, a, b):
José Fonsecae147b612011-12-04 15:32:03 +0000198 if self.comparer.visit(a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100199 return
200 Visitor.visit(self, a, b)
201
José Fonseca54f304a2012-01-14 19:33:08 +0000202 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100203 if not isinstance(b, dict):
204 self.replace(a, b)
205 else:
206 self.dumper.enter_object()
207 names = set(a.keys())
José Fonseca3bb5dd42011-12-07 10:59:02 +0000208 if not self.comparer.ignore_added:
209 names.update(b.keys())
José Fonseca0badbf02011-04-12 09:17:32 +0100210 names = list(names)
211 names.sort()
212
José Fonseca3bb5dd42011-12-07 10:59:02 +0000213 for i in range(len(names)):
214 name = names[i]
215 ae = a.get(name, None)
José Fonseca0badbf02011-04-12 09:17:32 +0100216 be = b.get(name, None)
José Fonsecae147b612011-12-04 15:32:03 +0000217 if not self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100218 self.dumper.enter_member(name)
219 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000220 self.dumper.leave_member(i == len(names) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +0100221
222 self.dumper.leave_object()
223
José Fonseca54f304a2012-01-14 19:33:08 +0000224 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100225 if not isinstance(b, list):
226 self.replace(a, b)
227 else:
228 self.dumper.enter_array()
José Fonseca3bb5dd42011-12-07 10:59:02 +0000229 max_len = max(len(a), len(b))
230 for i in range(max_len):
José Fonseca0badbf02011-04-12 09:17:32 +0100231 try:
232 ae = a[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100233 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100234 ae = None
235 try:
236 be = b[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100237 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100238 be = None
239 self.dumper._indent()
José Fonsecae147b612011-12-04 15:32:03 +0000240 if self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100241 self.dumper.visit(ae)
242 else:
243 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000244 if i != max_len - 1:
245 self.dumper._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100246 self.dumper._newline()
247
248 self.dumper.leave_array()
249
José Fonseca54f304a2012-01-14 19:33:08 +0000250 def visitValue(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100251 if a != b:
252 self.replace(a, b)
253
254 def replace(self, a, b):
255 self.dumper.visit(a)
256 self.dumper._write(' -> ')
257 self.dumper.visit(b)
258
259
José Fonsecae26d30e2011-12-11 13:37:51 +0000260#
261# Unfortunately JSON standard does not include comments, but this is a quite
262# useful feature to have on regressions tests
263#
264
265_token_res = [
266 r'//[^\r\n]*', # comment
267 r'"[^"\\]*(\\.[^"\\]*)*"', # string
268]
269
270_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
271
272
273def _strip_comment(mo):
274 if mo.group(1):
275 return ''
276 else:
277 return mo.group(0)
278
279
280def _strip_comments(data):
281 '''Strip (non-standard) JSON comments.'''
282 return _tokens_re.sub(_strip_comment, data)
283
284
285assert _strip_comments('''// a comment
286"// a comment in a string
287"''') == '''
288"// a comment in a string
289"'''
290
291
292def load(stream, strip_images = True, strip_comments = True):
293 if strip_images:
José Fonsecae147b612011-12-04 15:32:03 +0000294 object_hook = strip_object_hook
295 else:
296 object_hook = None
José Fonsecae26d30e2011-12-11 13:37:51 +0000297 if strip_comments:
298 data = stream.read()
299 data = _strip_comments(data)
300 return json.loads(data, strict=False, object_hook = object_hook)
301 else:
302 return json.load(stream, strict=False, object_hook = object_hook)
José Fonseca0badbf02011-04-12 09:17:32 +0100303
304
305def main():
José Fonsecae26d30e2011-12-11 13:37:51 +0000306 optparser = optparse.OptionParser(
307 usage="\n\t%prog [options] <ref_json> <src_json>")
308 optparser.add_option(
Jose Fonsecaaeec2ba2014-06-18 13:26:04 +0100309 '--ignore-added',
310 action="store_true", dest="ignore_added", default=False,
311 help="ignore added state")
312 optparser.add_option(
José Fonsecae26d30e2011-12-11 13:37:51 +0000313 '--keep-images',
314 action="store_false", dest="strip_images", default=True,
315 help="compare images")
316
317 (options, args) = optparser.parse_args(sys.argv[1:])
318
319 if len(args) != 2:
320 optparser.error('incorrect number of arguments')
321
Jose Fonsecaaeec2ba2014-06-18 13:26:04 +0100322 a = load(open(args[0], 'rt'), options.strip_images)
323 b = load(open(args[1], 'rt'), options.strip_images)
José Fonseca0badbf02011-04-12 09:17:32 +0100324
José Fonseca3bb5dd42011-12-07 10:59:02 +0000325 if False:
326 dumper = Dumper()
327 dumper.visit(a)
José Fonseca0badbf02011-04-12 09:17:32 +0100328
Jose Fonsecaaeec2ba2014-06-18 13:26:04 +0100329 differ = Differ(ignore_added = options.ignore_added)
José Fonseca0badbf02011-04-12 09:17:32 +0100330 differ.visit(a, b)
331
332
333if __name__ == '__main__':
334 main()