blob: d7a18e21112abfbdccf90df7d4fa3f0c684bcdff [file] [log] [blame]
Jose Fonseca247e1fa2019-04-28 14:14:44 +01001#!/usr/bin/env python3
José Fonseca0badbf02011-04-12 09:17:32 +01002##########################################################################
3#
Jose Fonsecadcdeba42015-07-10 09:43:53 +01004# Copyright 2015 VMware, Inc.
José Fonseca0badbf02011-04-12 09:17:32 +01005# Copyright 2011 Jose Fonseca
6# All Rights Reserved.
7#
8# Permission is hereby granted, free of charge, to any person obtaining a copy
9# of this software and associated documentation files (the "Software"), to deal
10# in the Software without restriction, including without limitation the rights
11# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12# copies of the Software, and to permit persons to whom the Software is
13# furnished to do so, subject to the following conditions:
14#
15# The above copyright notice and this permission notice shall be included in
16# all copies or substantial portions of the Software.
17#
18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24# THE SOFTWARE.
25#
26##########################################################################/
27
28
29import json
José Fonsecae26d30e2011-12-11 13:37:51 +000030import optparse
31import re
Jose Fonsecadcdeba42015-07-10 09:43:53 +010032import difflib
José Fonseca0badbf02011-04-12 09:17:32 +010033import sys
34
35
José Fonsecae147b612011-12-04 15:32:03 +000036def strip_object_hook(obj):
José Fonseca0badbf02011-04-12 09:17:32 +010037 if '__class__' in obj:
38 return None
Piotr Podsiadły0b8b0192019-01-03 20:39:55 +010039 for name in list(obj.keys()):
José Fonseca0badbf02011-04-12 09:17:32 +010040 if name.startswith('__') and name.endswith('__'):
41 del obj[name]
42 return obj
43
44
45class Visitor:
46
47 def visit(self, node, *args, **kwargs):
48 if isinstance(node, dict):
José Fonseca54f304a2012-01-14 19:33:08 +000049 return self.visitObject(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010050 elif isinstance(node, list):
José Fonseca54f304a2012-01-14 19:33:08 +000051 return self.visitArray(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010052 else:
José Fonseca54f304a2012-01-14 19:33:08 +000053 return self.visitValue(node, *args, **kwargs)
José Fonseca0badbf02011-04-12 09:17:32 +010054
José Fonseca54f304a2012-01-14 19:33:08 +000055 def visitObject(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010056 pass
57
José Fonseca54f304a2012-01-14 19:33:08 +000058 def visitArray(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010059 pass
60
José Fonseca54f304a2012-01-14 19:33:08 +000061 def visitValue(self, node, *args, **kwargs):
José Fonseca0badbf02011-04-12 09:17:32 +010062 pass
63
64
65class Dumper(Visitor):
66
José Fonseca0b956fd2011-06-04 22:51:45 +010067 def __init__(self, stream = sys.stdout):
68 self.stream = stream
José Fonsecae147b612011-12-04 15:32:03 +000069 self.level = 0
José Fonseca0badbf02011-04-12 09:17:32 +010070
71 def _write(self, s):
José Fonseca0b956fd2011-06-04 22:51:45 +010072 self.stream.write(s)
José Fonseca0badbf02011-04-12 09:17:32 +010073
74 def _indent(self):
75 self._write(' '*self.level)
76
77 def _newline(self):
78 self._write('\n')
79
José Fonseca54f304a2012-01-14 19:33:08 +000080 def visitObject(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +010081 self.enter_object()
82
Piotr Podsiadły0b8b0192019-01-03 20:39:55 +010083 members = list(node.keys())
José Fonseca0badbf02011-04-12 09:17:32 +010084 members.sort()
85 for i in range(len(members)):
86 name = members[i]
87 value = node[name]
88 self.enter_member(name)
89 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +000090 self.leave_member(i == len(members) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +010091 self.leave_object()
92
93 def enter_object(self):
94 self._write('{')
95 self._newline()
96 self.level += 1
97
98 def enter_member(self, name):
99 self._indent()
100 self._write('%s: ' % name)
101
José Fonseca3bb5dd42011-12-07 10:59:02 +0000102 def leave_member(self, last):
103 if not last:
104 self._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100105 self._newline()
106
107 def leave_object(self):
108 self.level -= 1
109 self._indent()
110 self._write('}')
José Fonseca3bb5dd42011-12-07 10:59:02 +0000111 if self.level <= 0:
112 self._newline()
José Fonseca0badbf02011-04-12 09:17:32 +0100113
José Fonseca54f304a2012-01-14 19:33:08 +0000114 def visitArray(self, node):
José Fonseca0badbf02011-04-12 09:17:32 +0100115 self.enter_array()
116 for i in range(len(node)):
117 value = node[i]
118 self._indent()
119 self.visit(value)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000120 if i != len(node) - 1:
José Fonseca0badbf02011-04-12 09:17:32 +0100121 self._write(',')
122 self._newline()
123 self.leave_array()
124
125 def enter_array(self):
126 self._write('[')
127 self._newline()
128 self.level += 1
129
130 def leave_array(self):
131 self.level -= 1
132 self._indent()
133 self._write(']')
134
José Fonseca54f304a2012-01-14 19:33:08 +0000135 def visitValue(self, node):
Jose Fonsecadcdeba42015-07-10 09:43:53 +0100136 self._write(json.dumps(node, allow_nan=True))
José Fonseca0badbf02011-04-12 09:17:32 +0100137
138
139
140class Comparer(Visitor):
141
José Fonseca5d4d17e2011-12-13 08:29:55 +0000142 def __init__(self, ignore_added = False, tolerance = 2.0 ** -24):
José Fonsecae147b612011-12-04 15:32:03 +0000143 self.ignore_added = ignore_added
José Fonseca5d4d17e2011-12-13 08:29:55 +0000144 self.tolerance = tolerance
José Fonsecae147b612011-12-04 15:32:03 +0000145
José Fonseca54f304a2012-01-14 19:33:08 +0000146 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100147 if not isinstance(b, dict):
148 return False
José Fonsecae147b612011-12-04 15:32:03 +0000149 if len(a) != len(b) and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100150 return False
Piotr Podsiadły0b8b0192019-01-03 20:39:55 +0100151 ak = list(a.keys())
152 bk = list(b.keys())
José Fonseca0badbf02011-04-12 09:17:32 +0100153 ak.sort()
154 bk.sort()
José Fonsecae147b612011-12-04 15:32:03 +0000155 if ak != bk and not self.ignore_added:
José Fonseca0badbf02011-04-12 09:17:32 +0100156 return False
157 for k in ak:
José Fonsecae147b612011-12-04 15:32:03 +0000158 ae = a[k]
159 try:
160 be = b[k]
161 except KeyError:
162 return False
163 if not self.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100164 return False
165 return True
166
José Fonseca54f304a2012-01-14 19:33:08 +0000167 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100168 if not isinstance(b, list):
169 return False
170 if len(a) != len(b):
171 return False
172 for ae, be in zip(a, b):
173 if not self.visit(ae, be):
174 return False
175 return True
176
José Fonseca54f304a2012-01-14 19:33:08 +0000177 def visitValue(self, a, b):
Piotr Podsiadły0b8b0192019-01-03 20:39:55 +0100178 if isinstance(a, float) and isinstance(b, (int, float)) or \
179 isinstance(b, float) and isinstance(a, (int, float)):
José Fonseca5ae6bdc2014-11-07 11:33:28 +0000180 if a is b:
181 # NaNs take this path
182 return True
183 elif a == b:
184 return True
185 elif a == 0:
José Fonseca5d4d17e2011-12-13 08:29:55 +0000186 return abs(b) < self.tolerance
187 else:
188 return abs((b - a)/a) < self.tolerance
189 else:
190 return a == b
José Fonseca0badbf02011-04-12 09:17:32 +0100191
192
193class Differ(Visitor):
194
José Fonsecae147b612011-12-04 15:32:03 +0000195 def __init__(self, stream = sys.stdout, ignore_added = False):
José Fonseca0b956fd2011-06-04 22:51:45 +0100196 self.dumper = Dumper(stream)
José Fonsecae147b612011-12-04 15:32:03 +0000197 self.comparer = Comparer(ignore_added = ignore_added)
José Fonseca0badbf02011-04-12 09:17:32 +0100198
199 def visit(self, a, b):
José Fonsecae147b612011-12-04 15:32:03 +0000200 if self.comparer.visit(a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100201 return
202 Visitor.visit(self, a, b)
203
José Fonseca54f304a2012-01-14 19:33:08 +0000204 def visitObject(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100205 if not isinstance(b, dict):
206 self.replace(a, b)
207 else:
208 self.dumper.enter_object()
209 names = set(a.keys())
José Fonseca3bb5dd42011-12-07 10:59:02 +0000210 if not self.comparer.ignore_added:
Piotr Podsiadły0b8b0192019-01-03 20:39:55 +0100211 names.update(list(b.keys()))
José Fonseca0badbf02011-04-12 09:17:32 +0100212 names = list(names)
213 names.sort()
214
José Fonseca3bb5dd42011-12-07 10:59:02 +0000215 for i in range(len(names)):
216 name = names[i]
217 ae = a.get(name, None)
José Fonseca0badbf02011-04-12 09:17:32 +0100218 be = b.get(name, None)
José Fonsecae147b612011-12-04 15:32:03 +0000219 if not self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100220 self.dumper.enter_member(name)
221 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000222 self.dumper.leave_member(i == len(names) - 1)
José Fonseca0badbf02011-04-12 09:17:32 +0100223
224 self.dumper.leave_object()
225
José Fonseca54f304a2012-01-14 19:33:08 +0000226 def visitArray(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100227 if not isinstance(b, list):
228 self.replace(a, b)
229 else:
230 self.dumper.enter_array()
José Fonseca3bb5dd42011-12-07 10:59:02 +0000231 max_len = max(len(a), len(b))
232 for i in range(max_len):
José Fonseca0badbf02011-04-12 09:17:32 +0100233 try:
234 ae = a[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100235 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100236 ae = None
237 try:
238 be = b[i]
José Fonseca250b36b2011-04-13 13:59:00 +0100239 except IndexError:
José Fonseca0badbf02011-04-12 09:17:32 +0100240 be = None
241 self.dumper._indent()
José Fonsecae147b612011-12-04 15:32:03 +0000242 if self.comparer.visit(ae, be):
José Fonseca0badbf02011-04-12 09:17:32 +0100243 self.dumper.visit(ae)
244 else:
245 self.visit(ae, be)
José Fonseca3bb5dd42011-12-07 10:59:02 +0000246 if i != max_len - 1:
247 self.dumper._write(',')
José Fonseca0badbf02011-04-12 09:17:32 +0100248 self.dumper._newline()
249
250 self.dumper.leave_array()
251
José Fonseca54f304a2012-01-14 19:33:08 +0000252 def visitValue(self, a, b):
José Fonseca0badbf02011-04-12 09:17:32 +0100253 if a != b:
254 self.replace(a, b)
255
256 def replace(self, a, b):
Jose Fonsecadcdeba42015-07-10 09:43:53 +0100257 if self.isMultilineString(a) or self.isMultilineString(b):
258 a = str(a)
259 b = str(b)
260 a = a.splitlines()
261 b = b.splitlines()
262 differ = difflib.Differ()
263 result = differ.compare(a, b)
264 self.dumper.level += 1
265 for entry in result:
266 self.dumper._newline()
267 self.dumper._indent()
268 tag = entry[:2]
269 text = entry[2:]
270 if tag == '? ':
271 tag = ' '
272 prefix = ' '
273 text = text.rstrip()
274 suffix = ''
275 else:
276 prefix = '"'
277 suffix = '\\n"'
278 line = tag + prefix + text + suffix
279 self.dumper._write(line)
280 self.dumper.level -= 1
281 return
José Fonseca0badbf02011-04-12 09:17:32 +0100282 self.dumper.visit(a)
283 self.dumper._write(' -> ')
284 self.dumper.visit(b)
285
Jose Fonsecadcdeba42015-07-10 09:43:53 +0100286 def isMultilineString(self, value):
Piotr Podsiadły0b8b0192019-01-03 20:39:55 +0100287 return isinstance(value, str) and '\n' in value
Jose Fonsecadcdeba42015-07-10 09:43:53 +0100288
José Fonseca0badbf02011-04-12 09:17:32 +0100289
José Fonsecae26d30e2011-12-11 13:37:51 +0000290#
291# Unfortunately JSON standard does not include comments, but this is a quite
292# useful feature to have on regressions tests
293#
294
295_token_res = [
296 r'//[^\r\n]*', # comment
297 r'"[^"\\]*(\\.[^"\\]*)*"', # string
298]
299
300_tokens_re = re.compile(r'|'.join(['(' + token_re + ')' for token_re in _token_res]), re.DOTALL)
301
302
303def _strip_comment(mo):
304 if mo.group(1):
305 return ''
306 else:
307 return mo.group(0)
308
309
310def _strip_comments(data):
311 '''Strip (non-standard) JSON comments.'''
312 return _tokens_re.sub(_strip_comment, data)
313
314
315assert _strip_comments('''// a comment
316"// a comment in a string
317"''') == '''
318"// a comment in a string
319"'''
320
321
322def load(stream, strip_images = True, strip_comments = True):
323 if strip_images:
José Fonsecae147b612011-12-04 15:32:03 +0000324 object_hook = strip_object_hook
325 else:
326 object_hook = None
José Fonsecae26d30e2011-12-11 13:37:51 +0000327 if strip_comments:
328 data = stream.read()
329 data = _strip_comments(data)
330 return json.loads(data, strict=False, object_hook = object_hook)
331 else:
332 return json.load(stream, strict=False, object_hook = object_hook)
José Fonseca0badbf02011-04-12 09:17:32 +0100333
334
335def main():
José Fonsecae26d30e2011-12-11 13:37:51 +0000336 optparser = optparse.OptionParser(
337 usage="\n\t%prog [options] <ref_json> <src_json>")
338 optparser.add_option(
Jose Fonsecaaeec2ba2014-06-18 13:26:04 +0100339 '--ignore-added',
340 action="store_true", dest="ignore_added", default=False,
341 help="ignore added state")
342 optparser.add_option(
José Fonsecae26d30e2011-12-11 13:37:51 +0000343 '--keep-images',
344 action="store_false", dest="strip_images", default=True,
345 help="compare images")
346
347 (options, args) = optparser.parse_args(sys.argv[1:])
348
349 if len(args) != 2:
350 optparser.error('incorrect number of arguments')
351
Jose Fonsecaaeec2ba2014-06-18 13:26:04 +0100352 a = load(open(args[0], 'rt'), options.strip_images)
353 b = load(open(args[1], 'rt'), options.strip_images)
José Fonseca0badbf02011-04-12 09:17:32 +0100354
José Fonseca3bb5dd42011-12-07 10:59:02 +0000355 if False:
356 dumper = Dumper()
357 dumper.visit(a)
José Fonseca0badbf02011-04-12 09:17:32 +0100358
Jose Fonsecaaeec2ba2014-06-18 13:26:04 +0100359 differ = Differ(ignore_added = options.ignore_added)
José Fonseca0badbf02011-04-12 09:17:32 +0100360 differ.visit(a, b)
361
362
363if __name__ == '__main__':
364 main()