blob: eff36e0d3ad46922f3159b0edc74b527551c459f [file] [log] [blame]
José Fonseca299a1b32012-01-26 20:32:59 +00001/**************************************************************************
2 *
3 * Copyright 2012 Jose Fonseca
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 *
24 **************************************************************************/
25
26/*
27 * Python pickle writer
28 */
29
Jose Fonseca9653f952015-05-19 16:32:43 +010030#pragma once
José Fonseca299a1b32012-01-26 20:32:59 +000031
32#include <assert.h>
33#include <stddef.h>
José Fonseca447576d2012-01-27 14:27:13 +000034#include <stdint.h>
Harald Fernengel865a9532015-06-25 16:23:17 +010035#include <wchar.h>
José Fonseca299a1b32012-01-26 20:32:59 +000036
37#include <ostream>
38#include <string>
José Fonsecaebb75cb2012-11-17 11:44:43 +000039#include <limits>
José Fonseca299a1b32012-01-26 20:32:59 +000040
41
42class PickleWriter
43{
44private:
45 std::ostream &os;
46
47 /*
48 * Python pickle opcodes. See pickle.py and pickletools.py from Python
49 * standard library for details.
50 */
51 enum Opcode {
52 MARK = '(',
53 STOP = '.',
54 POP = '0',
55 POP_MARK = '1',
56 DUP = '2',
57 FLOAT = 'F',
58 INT = 'I',
59 BININT = 'J',
60 BININT1 = 'K',
61 LONG = 'L',
62 BININT2 = 'M',
63 NONE = 'N',
64 PERSID = 'P',
65 BINPERSID = 'Q',
66 REDUCE = 'R',
67 STRING = 'S',
68 BINSTRING = 'T',
69 SHORT_BINSTRING = 'U',
70 UNICODE = 'V',
71 BINUNICODE = 'X',
72 APPEND = 'a',
73 BUILD = 'b',
74 GLOBAL = 'c',
75 DICT = 'd',
76 EMPTY_DICT = '}',
77 APPENDS = 'e',
78 GET = 'g',
79 BINGET = 'h',
80 INST = 'i',
81 LONG_BINGET = 'j',
82 LIST = 'l',
83 EMPTY_LIST = ']',
84 OBJ = 'o',
85 PUT = 'p',
86 BINPUT = 'q',
87 LONG_BINPUT = 'r',
88 SETITEM = 's',
89 TUPLE = 't',
90 EMPTY_TUPLE = ')',
91 SETITEMS = 'u',
92 BINFLOAT = 'G',
93
José Fonseca447576d2012-01-27 14:27:13 +000094 PROTO = '\x80',
95 NEWOBJ = '\x81',
96 EXT1 = '\x82',
97 EXT2 = '\x83',
98 EXT4 = '\x84',
99 TUPLE1 = '\x85',
100 TUPLE2 = '\x86',
101 TUPLE3 = '\x87',
102 NEWTRUE = '\x88',
103 NEWFALSE = '\x89',
104 LONG1 = '\x8a',
105 LONG4 = '\x8b',
José Fonseca299a1b32012-01-26 20:32:59 +0000106 };
107
108public:
José Fonseca447576d2012-01-27 14:27:13 +0000109 PickleWriter(std::ostream &_os) :
José Fonsecab16a4a82012-03-16 08:21:29 +0000110 os(_os) {
111 }
112
113 inline void begin() {
José Fonseca299a1b32012-01-26 20:32:59 +0000114 os.put(PROTO);
115 os.put(2);
116 }
117
José Fonsecab16a4a82012-03-16 08:21:29 +0000118 inline void end() {
José Fonseca299a1b32012-01-26 20:32:59 +0000119 os.put(STOP);
120 }
121
122 inline void beginDict() {
123 os.put(EMPTY_DICT);
124 os.put(BINPUT);
125 os.put(1);
126 }
127
128 inline void endDict() {
129 }
130
131 inline void beginItem() {
132 }
133
134 inline void beginItem(const char * name) {
135 writeString(name);
136 }
137
138 inline void beginItem(const std::string &name) {
139 beginItem(name.c_str());
140 }
141
142 inline void endItem(void) {
143 os.put(SETITEM);
144 }
145
146 inline void beginList() {
147 os.put(EMPTY_LIST);
148 os.put(BINPUT);
149 os.put(1);
150 os.put(MARK);
151 }
152
153 inline void endList(void) {
154 os.put(APPENDS);
155 }
156
157 inline void beginTuple() {
158 os.put(MARK);
159 }
160
161 inline void endTuple(void) {
162 os.put(TUPLE);
163 }
164
José Fonseca9f580db2014-06-20 14:09:11 +0100165 inline void beginTuple(unsigned length) {
166 if (length >= 4) {
167 os.put(MARK);
168 }
169 }
170
171 inline void endTuple(unsigned length) {
172 static const Opcode ops[4] = {
173 EMPTY_TUPLE,
174 TUPLE1,
175 TUPLE2,
176 TUPLE3,
177 };
178 Opcode op = length < 4 ? ops[length] : TUPLE;
179 os.put(op);
180 }
181
José Fonseca299a1b32012-01-26 20:32:59 +0000182 inline void writeString(const char *s, size_t length) {
183 if (!s) {
184 writeNone();
185 return;
186 }
187
188 if (length < 256) {
189 os.put(SHORT_BINSTRING);
190 os.put(length);
191 } else {
192 os.put(BINSTRING);
José Fonseca447576d2012-01-27 14:27:13 +0000193 putInt32(length);
José Fonseca299a1b32012-01-26 20:32:59 +0000194 }
195 os.write(s, length);
196
197 os.put(BINPUT);
198 os.put(1);
199 }
200
201 inline void writeString(const char *s) {
202 if (!s) {
203 writeNone();
204 return;
205 }
206
207 writeString(s, strlen(s));
208 }
209
210 inline void writeString(const std::string &s) {
211 writeString(s.c_str(), s.size());
212 }
213
José Fonsecad5cda7c2014-09-25 15:19:09 +0100214 inline void writeWString(const wchar_t *s, size_t length) {
215 if (!s) {
216 writeNone();
217 return;
218 }
219
220 /* FIXME: emit UTF-8 */
221 os.put(BINUNICODE);
222 putInt32(length);
223 for (size_t i = 0; i < length; ++i) {
224 wchar_t wc = s[i];
225 char c = wc >= 0 && wc < 0x80 ? (char)wc : '?';
226 os.put(c);
227 }
228
229 os.put(BINPUT);
230 os.put(1);
231 }
232
233 inline void writeWString(const wchar_t *s) {
234 if (!s) {
235 writeNone();
236 return;
237 }
238
239 writeWString(s, wcslen(s));
240 }
241
José Fonseca299a1b32012-01-26 20:32:59 +0000242 inline void writeNone(void) {
243 os.put(NONE);
244 }
245
246 inline void writeBool(bool b) {
247 os.put(b ? NEWTRUE : NEWFALSE);
248 }
249
José Fonseca447576d2012-01-27 14:27:13 +0000250 inline void writeInt(uint8_t i) {
251 os.put(BININT1);
252 os.put(i);
José Fonseca299a1b32012-01-26 20:32:59 +0000253 }
254
José Fonseca447576d2012-01-27 14:27:13 +0000255 inline void writeInt(uint16_t i) {
256 if (i < 0x100) {
257 writeInt((uint8_t)i);
258 } else {
259 os.put(BININT2);
260 putInt16(i);
261 }
262 }
263
264 inline void writeInt(int32_t i) {
265 if (0 <= i && i < 0x10000) {
266 writeInt((uint16_t)i);
267 } else {
268 os.put(BININT);
269 putInt32(i);
270 }
271 }
272
273 inline void writeInt(uint32_t i) {
274 if (i < 0x8000000) {
275 writeInt((int32_t)i);
276 } else {
277 writeLong(i);
278 }
279 }
280
281 inline void writeInt(long long i) {
282 if (-0x8000000 <= i && i < 0x8000000) {
283 writeInt((int32_t)i);
284 } else {
285 writeLong(i);
286 }
287 }
288
289 inline void writeInt(unsigned long long i) {
290 if (i < 0x8000000) {
291 writeInt((int32_t)i);
292 } else {
293 writeLong(i);
294 }
295 }
296
297 inline void writeFloat(double f) {
298 union {
299 double f;
300 char c[8];
301 } u;
302
Jose Fonseca684771c2015-03-09 07:56:22 +0000303 static_assert(sizeof u.f == sizeof u.c, "double is not 8 bytes");
José Fonseca447576d2012-01-27 14:27:13 +0000304 u.f = f;
305
306 os.put(BINFLOAT);
307 os.put(u.c[7]);
308 os.put(u.c[6]);
309 os.put(u.c[5]);
310 os.put(u.c[4]);
311 os.put(u.c[3]);
312 os.put(u.c[2]);
313 os.put(u.c[1]);
314 os.put(u.c[0]);
315 }
316
José Fonsecaeba2dec2012-03-17 16:12:22 +0000317 inline void writeByteArray(const void *buf, size_t length) {
318 os.put(GLOBAL);
319 os << "__builtin__\nbytearray\n";
320 os.put(BINPUT);
321 os.put(1);
322 writeString(static_cast<const char *>(buf), length);
323 os.put(TUPLE1);
324 os.put(REDUCE);
325 }
326
José Fonseca66b7bcc2014-06-20 14:12:19 +0100327 inline void writePointer(unsigned long long addr) {
328 os.put(GLOBAL);
329 os << "unpickle\nPointer\n";
330 os.put(BINPUT);
331 os.put(1);
332 writeInt(addr);
333 os.put(TUPLE1);
334 os.put(REDUCE);
335 }
336
José Fonseca447576d2012-01-27 14:27:13 +0000337protected:
338 inline void putInt16(uint16_t i) {
339 os.put( i & 0xff);
340 os.put( i >> 8 );
341 }
342
343 inline void putInt32(uint32_t i) {
344 os.put( i & 0xff);
345 os.put((i >> 8) & 0xff);
346 os.put((i >> 16) & 0xff);
347 os.put( i >> 24 );
348 }
349
350 template< class T >
351 inline void writeLong(T l) {
352 os.put(LONG1);
353
354 if (l == 0) {
355 os.put(0);
356 return;
357 }
358
José Fonseca447576d2012-01-27 14:27:13 +0000359 // Same as l >> (8 * sizeof l), but without the warnings
José Fonsecaebb75cb2012-11-17 11:44:43 +0000360 T sign;
361 if (std::numeric_limits<T>::is_signed) {
362 sign = l < 0 ? ~0 : 0;
363 } else {
364 sign = 0;
365 }
José Fonsecad64e5b22012-10-05 20:55:28 +0100366
José Fonsecafbab35d2014-05-13 17:36:42 +0100367 // Count how many bytes we need to represent the long integer.
José Fonsecad64e5b22012-10-05 20:55:28 +0100368 T sl = l;
369 unsigned c = 0;
370 do {
José Fonseca447576d2012-01-27 14:27:13 +0000371 ++c;
José Fonsecafbab35d2014-05-13 17:36:42 +0100372 sl >>= 8;
373 } while (sl != sign);
José Fonsecad64e5b22012-10-05 20:55:28 +0100374
José Fonseca447576d2012-01-27 14:27:13 +0000375 // Add an extra byte if sign bit doesn't match
376 if (((l >> (8 * c - 1)) & 1) != ((l >> (8 * sizeof l - 1)) & 1)) {
377 ++c;
378 }
379 os.put(c);
380
381 for (unsigned i = 0; i < c; ++ i) {
382 os.put(l & 0xff);
383 l >>= 8;
384 }
José Fonseca299a1b32012-01-26 20:32:59 +0000385 }
386};
387