blob: e1eac2caa1e8ecf89a231ed8b3d6013de66ff0cb [file] [log] [blame]
José Fonseca299a1b32012-01-26 20:32:59 +00001/**************************************************************************
2 *
3 * Copyright 2012 Jose Fonseca
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 *
24 **************************************************************************/
25
26/*
27 * Python pickle writer
28 */
29
Jose Fonseca9653f952015-05-19 16:32:43 +010030#pragma once
José Fonseca299a1b32012-01-26 20:32:59 +000031
32#include <assert.h>
33#include <stddef.h>
José Fonseca447576d2012-01-27 14:27:13 +000034#include <stdint.h>
Harald Fernengel865a9532015-06-25 16:23:17 +010035#include <wchar.h>
José Fonseca299a1b32012-01-26 20:32:59 +000036
37#include <ostream>
38#include <string>
José Fonsecaebb75cb2012-11-17 11:44:43 +000039#include <limits>
José Fonseca299a1b32012-01-26 20:32:59 +000040
41
42class PickleWriter
43{
44private:
45 std::ostream &os;
46
47 /*
48 * Python pickle opcodes. See pickle.py and pickletools.py from Python
49 * standard library for details.
50 */
51 enum Opcode {
52 MARK = '(',
53 STOP = '.',
54 POP = '0',
55 POP_MARK = '1',
56 DUP = '2',
Jose Fonsecae68f4322019-04-28 13:47:29 +010057 BINBYTES = 'B',
58 SHORT_BINBYTES = 'C',
José Fonseca299a1b32012-01-26 20:32:59 +000059 FLOAT = 'F',
60 INT = 'I',
61 BININT = 'J',
62 BININT1 = 'K',
63 LONG = 'L',
64 BININT2 = 'M',
65 NONE = 'N',
66 PERSID = 'P',
67 BINPERSID = 'Q',
68 REDUCE = 'R',
69 STRING = 'S',
70 BINSTRING = 'T',
71 SHORT_BINSTRING = 'U',
72 UNICODE = 'V',
73 BINUNICODE = 'X',
74 APPEND = 'a',
75 BUILD = 'b',
76 GLOBAL = 'c',
77 DICT = 'd',
78 EMPTY_DICT = '}',
79 APPENDS = 'e',
80 GET = 'g',
81 BINGET = 'h',
82 INST = 'i',
83 LONG_BINGET = 'j',
84 LIST = 'l',
85 EMPTY_LIST = ']',
86 OBJ = 'o',
87 PUT = 'p',
88 BINPUT = 'q',
89 LONG_BINPUT = 'r',
90 SETITEM = 's',
91 TUPLE = 't',
92 EMPTY_TUPLE = ')',
93 SETITEMS = 'u',
94 BINFLOAT = 'G',
95
José Fonseca447576d2012-01-27 14:27:13 +000096 PROTO = '\x80',
97 NEWOBJ = '\x81',
98 EXT1 = '\x82',
99 EXT2 = '\x83',
100 EXT4 = '\x84',
101 TUPLE1 = '\x85',
102 TUPLE2 = '\x86',
103 TUPLE3 = '\x87',
104 NEWTRUE = '\x88',
105 NEWFALSE = '\x89',
106 LONG1 = '\x8a',
107 LONG4 = '\x8b',
José Fonseca299a1b32012-01-26 20:32:59 +0000108 };
109
110public:
José Fonseca447576d2012-01-27 14:27:13 +0000111 PickleWriter(std::ostream &_os) :
José Fonsecab16a4a82012-03-16 08:21:29 +0000112 os(_os) {
113 }
114
115 inline void begin() {
José Fonseca299a1b32012-01-26 20:32:59 +0000116 os.put(PROTO);
Jose Fonsecae68f4322019-04-28 13:47:29 +0100117 os.put(3);
José Fonseca299a1b32012-01-26 20:32:59 +0000118 }
119
José Fonsecab16a4a82012-03-16 08:21:29 +0000120 inline void end() {
José Fonseca299a1b32012-01-26 20:32:59 +0000121 os.put(STOP);
122 }
123
124 inline void beginDict() {
125 os.put(EMPTY_DICT);
126 os.put(BINPUT);
127 os.put(1);
128 }
129
130 inline void endDict() {
131 }
132
133 inline void beginItem() {
134 }
135
136 inline void beginItem(const char * name) {
137 writeString(name);
138 }
139
140 inline void beginItem(const std::string &name) {
141 beginItem(name.c_str());
142 }
143
144 inline void endItem(void) {
145 os.put(SETITEM);
146 }
147
148 inline void beginList() {
149 os.put(EMPTY_LIST);
150 os.put(BINPUT);
151 os.put(1);
152 os.put(MARK);
153 }
154
155 inline void endList(void) {
156 os.put(APPENDS);
157 }
158
159 inline void beginTuple() {
160 os.put(MARK);
161 }
162
163 inline void endTuple(void) {
164 os.put(TUPLE);
165 }
166
José Fonseca9f580db2014-06-20 14:09:11 +0100167 inline void beginTuple(unsigned length) {
168 if (length >= 4) {
169 os.put(MARK);
170 }
171 }
172
173 inline void endTuple(unsigned length) {
174 static const Opcode ops[4] = {
175 EMPTY_TUPLE,
176 TUPLE1,
177 TUPLE2,
178 TUPLE3,
179 };
180 Opcode op = length < 4 ? ops[length] : TUPLE;
181 os.put(op);
182 }
183
José Fonseca299a1b32012-01-26 20:32:59 +0000184 inline void writeString(const char *s, size_t length) {
185 if (!s) {
186 writeNone();
187 return;
188 }
189
190 if (length < 256) {
191 os.put(SHORT_BINSTRING);
192 os.put(length);
193 } else {
194 os.put(BINSTRING);
José Fonseca447576d2012-01-27 14:27:13 +0000195 putInt32(length);
José Fonseca299a1b32012-01-26 20:32:59 +0000196 }
197 os.write(s, length);
198
199 os.put(BINPUT);
200 os.put(1);
201 }
202
203 inline void writeString(const char *s) {
204 if (!s) {
205 writeNone();
206 return;
207 }
208
209 writeString(s, strlen(s));
210 }
211
212 inline void writeString(const std::string &s) {
213 writeString(s.c_str(), s.size());
214 }
215
José Fonsecad5cda7c2014-09-25 15:19:09 +0100216 inline void writeWString(const wchar_t *s, size_t length) {
217 if (!s) {
218 writeNone();
219 return;
220 }
221
222 /* FIXME: emit UTF-8 */
223 os.put(BINUNICODE);
224 putInt32(length);
225 for (size_t i = 0; i < length; ++i) {
226 wchar_t wc = s[i];
227 char c = wc >= 0 && wc < 0x80 ? (char)wc : '?';
228 os.put(c);
229 }
230
231 os.put(BINPUT);
232 os.put(1);
233 }
234
235 inline void writeWString(const wchar_t *s) {
236 if (!s) {
237 writeNone();
238 return;
239 }
240
241 writeWString(s, wcslen(s));
242 }
243
José Fonseca299a1b32012-01-26 20:32:59 +0000244 inline void writeNone(void) {
245 os.put(NONE);
246 }
247
248 inline void writeBool(bool b) {
249 os.put(b ? NEWTRUE : NEWFALSE);
250 }
251
José Fonseca447576d2012-01-27 14:27:13 +0000252 inline void writeInt(uint8_t i) {
253 os.put(BININT1);
254 os.put(i);
José Fonseca299a1b32012-01-26 20:32:59 +0000255 }
256
José Fonseca447576d2012-01-27 14:27:13 +0000257 inline void writeInt(uint16_t i) {
258 if (i < 0x100) {
259 writeInt((uint8_t)i);
260 } else {
261 os.put(BININT2);
262 putInt16(i);
263 }
264 }
265
266 inline void writeInt(int32_t i) {
267 if (0 <= i && i < 0x10000) {
268 writeInt((uint16_t)i);
269 } else {
270 os.put(BININT);
271 putInt32(i);
272 }
273 }
274
275 inline void writeInt(uint32_t i) {
276 if (i < 0x8000000) {
277 writeInt((int32_t)i);
278 } else {
279 writeLong(i);
280 }
281 }
282
283 inline void writeInt(long long i) {
284 if (-0x8000000 <= i && i < 0x8000000) {
285 writeInt((int32_t)i);
286 } else {
287 writeLong(i);
288 }
289 }
290
291 inline void writeInt(unsigned long long i) {
292 if (i < 0x8000000) {
293 writeInt((int32_t)i);
294 } else {
295 writeLong(i);
296 }
297 }
298
299 inline void writeFloat(double f) {
300 union {
301 double f;
302 char c[8];
303 } u;
304
Jose Fonseca684771c2015-03-09 07:56:22 +0000305 static_assert(sizeof u.f == sizeof u.c, "double is not 8 bytes");
José Fonseca447576d2012-01-27 14:27:13 +0000306 u.f = f;
307
308 os.put(BINFLOAT);
309 os.put(u.c[7]);
310 os.put(u.c[6]);
311 os.put(u.c[5]);
312 os.put(u.c[4]);
313 os.put(u.c[3]);
314 os.put(u.c[2]);
315 os.put(u.c[1]);
316 os.put(u.c[0]);
317 }
318
Jose Fonsecae68f4322019-04-28 13:47:29 +0100319 inline void writeBytes(const char *s, size_t length) {
320 if (!s) {
321 writeNone();
322 return;
323 }
324
325 if (length < 256) {
326 os.put(SHORT_BINBYTES);
327 os.put(length);
328 } else {
329 os.put(BINBYTES);
330 putInt32(length);
331 }
332 os.write(s, length);
333
José Fonsecaeba2dec2012-03-17 16:12:22 +0000334 os.put(BINPUT);
335 os.put(1);
José Fonsecaeba2dec2012-03-17 16:12:22 +0000336 }
337
José Fonseca66b7bcc2014-06-20 14:12:19 +0100338 inline void writePointer(unsigned long long addr) {
339 os.put(GLOBAL);
340 os << "unpickle\nPointer\n";
341 os.put(BINPUT);
342 os.put(1);
343 writeInt(addr);
344 os.put(TUPLE1);
345 os.put(REDUCE);
346 }
347
José Fonseca447576d2012-01-27 14:27:13 +0000348protected:
349 inline void putInt16(uint16_t i) {
350 os.put( i & 0xff);
351 os.put( i >> 8 );
352 }
353
354 inline void putInt32(uint32_t i) {
355 os.put( i & 0xff);
356 os.put((i >> 8) & 0xff);
357 os.put((i >> 16) & 0xff);
358 os.put( i >> 24 );
359 }
360
361 template< class T >
362 inline void writeLong(T l) {
363 os.put(LONG1);
364
365 if (l == 0) {
366 os.put(0);
367 return;
368 }
369
José Fonseca447576d2012-01-27 14:27:13 +0000370 // Same as l >> (8 * sizeof l), but without the warnings
José Fonsecaebb75cb2012-11-17 11:44:43 +0000371 T sign;
372 if (std::numeric_limits<T>::is_signed) {
373 sign = l < 0 ? ~0 : 0;
374 } else {
375 sign = 0;
376 }
José Fonsecad64e5b22012-10-05 20:55:28 +0100377
José Fonsecafbab35d2014-05-13 17:36:42 +0100378 // Count how many bytes we need to represent the long integer.
José Fonsecad64e5b22012-10-05 20:55:28 +0100379 T sl = l;
380 unsigned c = 0;
381 do {
José Fonseca447576d2012-01-27 14:27:13 +0000382 ++c;
José Fonsecafbab35d2014-05-13 17:36:42 +0100383 sl >>= 8;
384 } while (sl != sign);
José Fonsecad64e5b22012-10-05 20:55:28 +0100385
José Fonseca447576d2012-01-27 14:27:13 +0000386 // Add an extra byte if sign bit doesn't match
387 if (((l >> (8 * c - 1)) & 1) != ((l >> (8 * sizeof l - 1)) & 1)) {
388 ++c;
389 }
390 os.put(c);
391
392 for (unsigned i = 0; i < c; ++ i) {
393 os.put(l & 0xff);
394 l >>= 8;
395 }
José Fonseca299a1b32012-01-26 20:32:59 +0000396 }
397};
398