blob: cbdacc2a772b340639dac2652221109f4fc108a4 [file] [log] [blame]
José Fonseca299a1b32012-01-26 20:32:59 +00001/**************************************************************************
2 *
3 * Copyright 2012 Jose Fonseca
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 *
24 **************************************************************************/
25
26/*
27 * Python pickle writer
28 */
29
Jose Fonseca9653f952015-05-19 16:32:43 +010030#pragma once
José Fonseca299a1b32012-01-26 20:32:59 +000031
32#include <assert.h>
33#include <stddef.h>
José Fonseca447576d2012-01-27 14:27:13 +000034#include <stdint.h>
José Fonseca299a1b32012-01-26 20:32:59 +000035
36#include <ostream>
37#include <string>
José Fonsecaebb75cb2012-11-17 11:44:43 +000038#include <limits>
José Fonseca299a1b32012-01-26 20:32:59 +000039
40
41class PickleWriter
42{
43private:
44 std::ostream &os;
45
46 /*
47 * Python pickle opcodes. See pickle.py and pickletools.py from Python
48 * standard library for details.
49 */
50 enum Opcode {
51 MARK = '(',
52 STOP = '.',
53 POP = '0',
54 POP_MARK = '1',
55 DUP = '2',
56 FLOAT = 'F',
57 INT = 'I',
58 BININT = 'J',
59 BININT1 = 'K',
60 LONG = 'L',
61 BININT2 = 'M',
62 NONE = 'N',
63 PERSID = 'P',
64 BINPERSID = 'Q',
65 REDUCE = 'R',
66 STRING = 'S',
67 BINSTRING = 'T',
68 SHORT_BINSTRING = 'U',
69 UNICODE = 'V',
70 BINUNICODE = 'X',
71 APPEND = 'a',
72 BUILD = 'b',
73 GLOBAL = 'c',
74 DICT = 'd',
75 EMPTY_DICT = '}',
76 APPENDS = 'e',
77 GET = 'g',
78 BINGET = 'h',
79 INST = 'i',
80 LONG_BINGET = 'j',
81 LIST = 'l',
82 EMPTY_LIST = ']',
83 OBJ = 'o',
84 PUT = 'p',
85 BINPUT = 'q',
86 LONG_BINPUT = 'r',
87 SETITEM = 's',
88 TUPLE = 't',
89 EMPTY_TUPLE = ')',
90 SETITEMS = 'u',
91 BINFLOAT = 'G',
92
José Fonseca447576d2012-01-27 14:27:13 +000093 PROTO = '\x80',
94 NEWOBJ = '\x81',
95 EXT1 = '\x82',
96 EXT2 = '\x83',
97 EXT4 = '\x84',
98 TUPLE1 = '\x85',
99 TUPLE2 = '\x86',
100 TUPLE3 = '\x87',
101 NEWTRUE = '\x88',
102 NEWFALSE = '\x89',
103 LONG1 = '\x8a',
104 LONG4 = '\x8b',
José Fonseca299a1b32012-01-26 20:32:59 +0000105 };
106
107public:
José Fonseca447576d2012-01-27 14:27:13 +0000108 PickleWriter(std::ostream &_os) :
José Fonsecab16a4a82012-03-16 08:21:29 +0000109 os(_os) {
110 }
111
112 inline void begin() {
José Fonseca299a1b32012-01-26 20:32:59 +0000113 os.put(PROTO);
114 os.put(2);
115 }
116
José Fonsecab16a4a82012-03-16 08:21:29 +0000117 inline void end() {
José Fonseca299a1b32012-01-26 20:32:59 +0000118 os.put(STOP);
119 }
120
121 inline void beginDict() {
122 os.put(EMPTY_DICT);
123 os.put(BINPUT);
124 os.put(1);
125 }
126
127 inline void endDict() {
128 }
129
130 inline void beginItem() {
131 }
132
133 inline void beginItem(const char * name) {
134 writeString(name);
135 }
136
137 inline void beginItem(const std::string &name) {
138 beginItem(name.c_str());
139 }
140
141 inline void endItem(void) {
142 os.put(SETITEM);
143 }
144
145 inline void beginList() {
146 os.put(EMPTY_LIST);
147 os.put(BINPUT);
148 os.put(1);
149 os.put(MARK);
150 }
151
152 inline void endList(void) {
153 os.put(APPENDS);
154 }
155
156 inline void beginTuple() {
157 os.put(MARK);
158 }
159
160 inline void endTuple(void) {
161 os.put(TUPLE);
162 }
163
José Fonseca9f580db2014-06-20 14:09:11 +0100164 inline void beginTuple(unsigned length) {
165 if (length >= 4) {
166 os.put(MARK);
167 }
168 }
169
170 inline void endTuple(unsigned length) {
171 static const Opcode ops[4] = {
172 EMPTY_TUPLE,
173 TUPLE1,
174 TUPLE2,
175 TUPLE3,
176 };
177 Opcode op = length < 4 ? ops[length] : TUPLE;
178 os.put(op);
179 }
180
José Fonseca299a1b32012-01-26 20:32:59 +0000181 inline void writeString(const char *s, size_t length) {
182 if (!s) {
183 writeNone();
184 return;
185 }
186
187 if (length < 256) {
188 os.put(SHORT_BINSTRING);
189 os.put(length);
190 } else {
191 os.put(BINSTRING);
José Fonseca447576d2012-01-27 14:27:13 +0000192 putInt32(length);
José Fonseca299a1b32012-01-26 20:32:59 +0000193 }
194 os.write(s, length);
195
196 os.put(BINPUT);
197 os.put(1);
198 }
199
200 inline void writeString(const char *s) {
201 if (!s) {
202 writeNone();
203 return;
204 }
205
206 writeString(s, strlen(s));
207 }
208
209 inline void writeString(const std::string &s) {
210 writeString(s.c_str(), s.size());
211 }
212
José Fonsecad5cda7c2014-09-25 15:19:09 +0100213 inline void writeWString(const wchar_t *s, size_t length) {
214 if (!s) {
215 writeNone();
216 return;
217 }
218
219 /* FIXME: emit UTF-8 */
220 os.put(BINUNICODE);
221 putInt32(length);
222 for (size_t i = 0; i < length; ++i) {
223 wchar_t wc = s[i];
224 char c = wc >= 0 && wc < 0x80 ? (char)wc : '?';
225 os.put(c);
226 }
227
228 os.put(BINPUT);
229 os.put(1);
230 }
231
232 inline void writeWString(const wchar_t *s) {
233 if (!s) {
234 writeNone();
235 return;
236 }
237
238 writeWString(s, wcslen(s));
239 }
240
José Fonseca299a1b32012-01-26 20:32:59 +0000241 inline void writeNone(void) {
242 os.put(NONE);
243 }
244
245 inline void writeBool(bool b) {
246 os.put(b ? NEWTRUE : NEWFALSE);
247 }
248
José Fonseca447576d2012-01-27 14:27:13 +0000249 inline void writeInt(uint8_t i) {
250 os.put(BININT1);
251 os.put(i);
José Fonseca299a1b32012-01-26 20:32:59 +0000252 }
253
José Fonseca447576d2012-01-27 14:27:13 +0000254 inline void writeInt(uint16_t i) {
255 if (i < 0x100) {
256 writeInt((uint8_t)i);
257 } else {
258 os.put(BININT2);
259 putInt16(i);
260 }
261 }
262
263 inline void writeInt(int32_t i) {
264 if (0 <= i && i < 0x10000) {
265 writeInt((uint16_t)i);
266 } else {
267 os.put(BININT);
268 putInt32(i);
269 }
270 }
271
272 inline void writeInt(uint32_t i) {
273 if (i < 0x8000000) {
274 writeInt((int32_t)i);
275 } else {
276 writeLong(i);
277 }
278 }
279
280 inline void writeInt(long long i) {
281 if (-0x8000000 <= i && i < 0x8000000) {
282 writeInt((int32_t)i);
283 } else {
284 writeLong(i);
285 }
286 }
287
288 inline void writeInt(unsigned long long i) {
289 if (i < 0x8000000) {
290 writeInt((int32_t)i);
291 } else {
292 writeLong(i);
293 }
294 }
295
296 inline void writeFloat(double f) {
297 union {
298 double f;
299 char c[8];
300 } u;
301
Jose Fonseca684771c2015-03-09 07:56:22 +0000302 static_assert(sizeof u.f == sizeof u.c, "double is not 8 bytes");
José Fonseca447576d2012-01-27 14:27:13 +0000303 u.f = f;
304
305 os.put(BINFLOAT);
306 os.put(u.c[7]);
307 os.put(u.c[6]);
308 os.put(u.c[5]);
309 os.put(u.c[4]);
310 os.put(u.c[3]);
311 os.put(u.c[2]);
312 os.put(u.c[1]);
313 os.put(u.c[0]);
314 }
315
José Fonsecaeba2dec2012-03-17 16:12:22 +0000316 inline void writeByteArray(const void *buf, size_t length) {
317 os.put(GLOBAL);
318 os << "__builtin__\nbytearray\n";
319 os.put(BINPUT);
320 os.put(1);
321 writeString(static_cast<const char *>(buf), length);
322 os.put(TUPLE1);
323 os.put(REDUCE);
324 }
325
José Fonseca66b7bcc2014-06-20 14:12:19 +0100326 inline void writePointer(unsigned long long addr) {
327 os.put(GLOBAL);
328 os << "unpickle\nPointer\n";
329 os.put(BINPUT);
330 os.put(1);
331 writeInt(addr);
332 os.put(TUPLE1);
333 os.put(REDUCE);
334 }
335
José Fonseca447576d2012-01-27 14:27:13 +0000336protected:
337 inline void putInt16(uint16_t i) {
338 os.put( i & 0xff);
339 os.put( i >> 8 );
340 }
341
342 inline void putInt32(uint32_t i) {
343 os.put( i & 0xff);
344 os.put((i >> 8) & 0xff);
345 os.put((i >> 16) & 0xff);
346 os.put( i >> 24 );
347 }
348
349 template< class T >
350 inline void writeLong(T l) {
351 os.put(LONG1);
352
353 if (l == 0) {
354 os.put(0);
355 return;
356 }
357
José Fonseca447576d2012-01-27 14:27:13 +0000358 // Same as l >> (8 * sizeof l), but without the warnings
José Fonsecaebb75cb2012-11-17 11:44:43 +0000359 T sign;
360 if (std::numeric_limits<T>::is_signed) {
361 sign = l < 0 ? ~0 : 0;
362 } else {
363 sign = 0;
364 }
José Fonsecad64e5b22012-10-05 20:55:28 +0100365
José Fonsecafbab35d2014-05-13 17:36:42 +0100366 // Count how many bytes we need to represent the long integer.
José Fonsecad64e5b22012-10-05 20:55:28 +0100367 T sl = l;
368 unsigned c = 0;
369 do {
José Fonseca447576d2012-01-27 14:27:13 +0000370 ++c;
José Fonsecafbab35d2014-05-13 17:36:42 +0100371 sl >>= 8;
372 } while (sl != sign);
José Fonsecad64e5b22012-10-05 20:55:28 +0100373
José Fonseca447576d2012-01-27 14:27:13 +0000374 // Add an extra byte if sign bit doesn't match
375 if (((l >> (8 * c - 1)) & 1) != ((l >> (8 * sizeof l - 1)) & 1)) {
376 ++c;
377 }
378 os.put(c);
379
380 for (unsigned i = 0; i < c; ++ i) {
381 os.put(l & 0xff);
382 l >>= 8;
383 }
José Fonseca299a1b32012-01-26 20:32:59 +0000384 }
385};
386