1 /*******************************************************************************
2  * Serialization/deserialization code
3  *
4  * Author: Matthew Soucy, dproto@msoucy.me
5  */
6 module dproto.serialize;
7 
8 import dproto.exception;
9 import dproto.compat;
10 
11 import std.algorithm;
12 import std.array;
13 import std.conv;
14 import std.exception;
15 import std.range;
16 import std.system : Endian;
17 import std.traits;
18 
19 /*******************************************************************************
20  * Returns whether the given string is a protocol buffer primitive
21  *
22  * Params:
23  *  	type = The type to check for
24  * Returns: True if the type is a protocol buffer primitive
25  */
26 bool isBuiltinType(string type) @safe pure nothrow {
27 	return ["int32" , "sint32", "int64", "sint64", "uint32", "uint64", "bool",
28 			"fixed64", "sfixed64", "double", "bytes", "string",
29 			"fixed32", "sfixed32", "float"].canFind(type);
30 }
31 
32 unittest {
33 	assert(isBuiltinType("sfixed32") == true);
34 	assert(isBuiltinType("double") == true);
35 	assert(isBuiltinType("string") == true);
36 	assert(isBuiltinType("int128") == false);
37 	assert(isBuiltinType("quad") == false);
38 }
39 
40 template PossiblyNullable(T) {
41 	static if(is(T == enum)) {
42 		alias PossiblyNullable = T;
43 	} else {
44 		import std.typecons : Nullable;
45 		alias PossiblyNullable = Nullable!T;
46 	}
47 }
48 
49 template UnspecifiedDefaultValue(T) {
50 	static if(is(T == enum)) {
51 		import std.traits : EnumMembers;
52 		enum UnspecifiedDefaultValue = EnumMembers!(T)[0];
53 	} else {
54 		enum UnspecifiedDefaultValue = T.init;
55 	}
56 }
57 
58 template SpecifiedDefaultValue(T, string value) {
59 	import std.conv : to;
60 	enum SpecifiedDefaultValue = to!T(value);
61 }
62 
63 /*******************************************************************************
64  * Maps the given type string to the data type it represents
65  */
66 template BuffType(string T) {
67 	// Msg type 0
68 	static if(T == "int32"  || T == "sint32") alias BuffType = int;
69 	else static if(T == "int64" || T == "sint64") alias BuffType = long;
70 	else static if(T == "uint32") alias BuffType = uint;
71 	else static if(T == "uint64") alias BuffType = ulong;
72 	else static if(T == "bool") alias BuffType = bool;
73 	// Msg type 1
74 	else static if(T == "fixed64") alias BuffType = ulong;
75 	else static if(T == "sfixed64") alias BuffType = long;
76 	else static if(T == "double") alias BuffType = double;
77 	// Msg type 2
78 	else static if(T == "bytes") alias BuffType = ubyte[];
79 	else static if(T == "string") alias BuffType = string;
80 	// Msg type 3,4 deprecated. Will not support.
81 	// Msg type 5
82 	else static if(T == "fixed32") alias BuffType = uint;
83 	else static if(T == "sfixed32") alias BuffType = int;
84 	else static if(T == "float") alias BuffType = float;
85 }
86 
87 unittest {
88 	assert(is(BuffType!"sfixed32" == int) == true);
89 	assert(is(BuffType!"double" == double) == true);
90 	assert(is(BuffType!"string" == string) == true);
91 	assert(is(BuffType!"bytes" : const ubyte[]) == true);
92 	assert(is(BuffType!"sfixed64" == int) == false);
93 }
94 
95 /*******************************************************************************
96  * Removes bytes from the range as if it were read in
97  *
98  * Params:
99  *  	header = The data header
100  *  	data   = The data to read from
101  */
102 void defaultDecode(R)(ulong header, ref R data)
103 	if(isInputRange!R && is(ElementType!R : const ubyte))
104 {
105 	switch(header.wireType) {
106 		case 0:
107 			data.readProto!"int32"();
108 			break;
109 		case 1:
110 			data.readProto!"fixed64"();
111 			break;
112 		case 2:
113 			data.readProto!"bytes"();
114 			break;
115 		case 5:
116 			data.readProto!"fixed32"();
117 			break;
118 		default:
119 			break;
120 	}
121 }
122 
123 /*******************************************************************************
124  * Maps the given type string to the wire type number
125  */
126 @nogc
127 auto msgType(string T) pure nothrow @safe {
128 	switch(T) {
129 		case "int32", "sint32", "uint32":
130 		case "int64", "sint64", "uint64":
131 		case "bool":
132 			return 0;
133 		case "fixed64", "sfixed64", "double":
134 			return 1;
135 		case "bytes", "string":
136 			return 2;
137 		case "fixed32", "sfixed32", "float":
138 			return 5;
139 		default:
140 			return 2;
141 	}
142 }
143 
144 /*******************************************************************************
145  * Encodes a number in its zigzag encoding
146  *
147  * Params:
148  *  	src = The raw integer to encode
149  * Returns: The zigzag-encoded value
150  */
151 @nogc Unsigned!T toZigZag(T)(T src) pure nothrow @safe @property
152 	if(isIntegral!T && isSigned!T)
153 {
154 	T ret = (src << 1) ^ (src >> (T.sizeof * 8 - 1));
155 
156 	return cast(Unsigned!T) ret;
157 }
158 
159 unittest {
160 	assert(0.toZigZag() == 0);
161 	assert((-1).toZigZag() == 1);
162 	assert(1.toZigZag() == 2);
163 	assert((-2).toZigZag() == 3);
164 	assert(2147483647.toZigZag() == 4294967294);
165 	assert((-2147483648).toZigZag() == 4294967295);
166 }
167 
168 /*******************************************************************************
169  * Decodes a number from its zigzag encoding
170  *
171  * Params:
172  *  	src = The zigzag-encoded value to decode
173  * Returns: The raw integer
174  */
175 @nogc Signed!T fromZigZag(T)(T src) pure nothrow @safe @property
176 	if(isIntegral!T && isUnsigned!T)
177 {
178 	return (src >>> 1) ^ -(src & 1);
179 }
180 
181 unittest {
182 	assert(0U.fromZigZag() == 0);
183 	assert(1U.fromZigZag() == -1);
184 	assert(2U.fromZigZag() == 1);
185 	assert(3U.fromZigZag() == -2);
186 	assert(4294967294U.fromZigZag() == 2147483647);
187 	assert(4294967295U.fromZigZag() == -2147483648);
188 
189   foreach(i;-3..3){
190     assert(i.toZigZag.fromZigZag == i);
191     long i2=i;
192     assert(i2.toZigZag.fromZigZag == i2);
193   }
194 }
195 
196 /*******************************************************************************
197  * Get the wire type from the encoding value
198  *
199  * Params:
200  *  	data = The data header
201  * Returns: The wire type value
202  */
203 @nogc ubyte wireType(ulong data) @safe @property pure nothrow {
204 	return data&7;
205 }
206 
207 unittest {
208 	assert((0x08).wireType() == 0); // Test for varints
209 	assert((0x09).wireType() == 1); // Test 64-bit
210 	assert((0x12).wireType() == 2); // Test length-delimited
211 }
212 
213 /*******************************************************************************
214  * Get the message number from the encoding value
215  *
216  * Params:
217  *  	data = The data header
218  * Returns: The message number
219  */
220 @nogc ulong msgNum(ulong data) @safe @property pure nothrow {
221 	return data>>3;
222 }
223 
224 unittest {
225 	assert((0x08).msgNum() == 1);
226 	assert((0x11).msgNum() == 2);
227 	assert((0x1a).msgNum() == 3);
228 	assert((0x22).msgNum() == 4);
229 }
230 
231 /*******************************************************************************
232  * Read a VarInt-encoded value from a data stream
233  *
234  * Removes the bytes that represent the data from the stream
235  *
236  * Params:
237  *  	src = The data stream
238  * Returns: The decoded value
239  */
240 T readVarint(T = ulong, R)(auto ref R src)
241 	if(isInputRange!R && is(ElementType!R : const ubyte))
242 {
243 	auto i = src.countUntil!( a=>!(a&0x80) )() + 1;
244 	auto ret = src.take(i);
245 	src.popFrontExactly(i);
246 	return ret.fromVarint!T();
247 }
248 
249 /*******************************************************************************
250  * Encode an unsigned value into a VarInt-encoded series of bytes
251  *
252  * Params:
253  *  	r = output range
254  *  	src = The value to encode
255  * Returns: The created VarInt
256  */
257 void toVarint(R, T)(ref R r, T src) @safe @property
258 	if(isOutputRange!(R, ubyte) && isIntegral!T && isUnsigned!T)
259 {
260 	immutable ubyte maxMask = 0b_1000_0000;
261 
262 	while( src >= maxMask )
263 	{
264 		r.put(cast(ubyte)(src | maxMask));
265 		src >>= 7;
266 	}
267 
268 	r.put(cast(ubyte) src);
269 }
270 
271 /*******************************************************************************
272  * Encode a signed value into a VarInt-encoded series of bytes
273  *
274  * This function is useful for encode int32 and int64 value types
275  * (Do not confuse it with signed values encoded by ZigZag!)
276  *
277  * Params:
278  *  	r = output range
279  *  	src = The value to encode
280  * Returns: The created VarInt
281  */
282 void toVarint(R)(ref R r, long src) @safe @property
283 	if(isOutputRange!(R, ubyte))
284 {
285 	ulong u = src;
286 	toVarint(r, u);
287 }
288 
289 unittest {
290 	static ubyte[] toVarint(ulong val) @property
291 	{
292 		auto r = appender!(ubyte[])();
293 		.toVarint(r, val);
294 		return r.data;
295 	}
296 	assert(equal(toVarint(150), [0x96, 0x01]));
297 	assert(equal(toVarint(3), [0x03]));
298 	assert(equal(toVarint(270), [0x8E, 0x02]));
299 	assert(equal(toVarint(86942), [0x9E, 0xA7, 0x05]));
300 	assert(equal(toVarint(ubyte.max), [0xFF, 0x01]));
301 	assert(equal(toVarint(uint.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xF]));
302 	assert(equal(toVarint(ulong.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]));
303 	assert(equal(toVarint(-1), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]));
304 	assert(toVarint(-12345).fromVarint!int == -12345);
305 	assert(toVarint(int.min).fromVarint!int == int.min);
306 }
307 
308 /*******************************************************************************
309  * Decode a VarInt-encoded series of bytes into an unsigned value
310  *
311  * Params:
312  *  	src = The data stream
313  * Returns: The decoded value
314  */
315 T fromVarint(T = ulong, R)(R src) @property
316 	if(isInputRange!R && is(ElementType!R : const ubyte) &&
317 		isIntegral!T && isUnsigned!T)
318 {
319 	immutable ubyte mask = 0b_0111_1111;
320 	T ret;
321 
322 	size_t offset;
323 	foreach(val; src)
324 	{
325 		ret |= cast(T)(val & mask) << offset;
326 
327 		enforce(
328 				offset < T.sizeof * 8,
329 				"Varint value is too big for the type " ~ T.stringof
330 			);
331 
332 		offset += 7;
333 	}
334 
335 	return ret;
336 }
337 
338 /*******************************************************************************
339  * Decode a VarInt-encoded series of bytes into a signed value
340  *
341  * Params:
342  *  	src = The data stream
343  * Returns: The decoded value
344  */
345 T fromVarint(T, R)(R src) @property
346 	if(isInputRange!R && is(ElementType!R : const ubyte) &&
347 		isIntegral!T && isSigned!T)
348 {
349 	long r = fromVarint!ulong(src);
350 	return r.to!T;
351 }
352 
353 unittest {
354 	ubyte[] ubs(ubyte[] vals...) {
355 		return vals.dup;
356 	}
357 
358 	assert(ubs(0x96, 0x01).fromVarint() == 150);
359 	assert(ubs(0x03).fromVarint() == 3);
360 	assert(ubs(0x8E, 0x02).fromVarint() == 270);
361 	assert(ubs(0x9E, 0xA7, 0x05).fromVarint() == 86942);
362 	assert(ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint!int() == -1);
363 
364 	bool overflow = false;
365 	try
366 		ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint();
367 	catch(Exception)
368 		overflow = true;
369 	finally
370 		assert(overflow);
371 }
372 
373 /// The type to encode an enum as
374 enum ENUM_SERIALIZATION = "int32";
375 /// The message type to encode a packed message as
376 enum PACKED_MSG_TYPE = 2;
377 
378 /*******************************************************************************
379  * Test a range for being a valid ProtoBuf input range
380  *
381  * Params:
382  *     R = type to test
383  * Returns: The value
384  */
385 
386 enum isProtoInputRange(R) = isInputRange!R && is(ElementType!R : const ubyte);
387 
388 /*******************************************************************************
389  * Decode a series of bytes into a value
390  *
391  * Params:
392  *  	src = The data stream
393  * Returns: The decoded value
394  */
395 BuffType!T readProto(string T, R)(auto ref R src)
396 	if(isProtoInputRange!R && T.msgType == "int32".msgType)
397 {
398 	alias BT = BuffType!T;
399 	static if(T == "sint32" || T == "sint64")
400 		return src.readVarint!(Unsigned!BT).fromZigZag;
401 	else static if(T == "bool")
402 		return src.readVarint.to!BT;
403 	else
404 		return src.readVarint!BT;
405 }
406 
407 /// Ditto
408 BuffType!T readProto(string T, R)(auto ref R src)
409 	if(isProtoInputRange!R &&
410 	  (T.msgType == "double".msgType || T.msgType == "float".msgType))
411 {
412 	import std.bitmanip : read, Endian;
413 	return src.read!(BuffType!T, Endian.littleEndian)();
414 }
415 
416 /// Ditto
417 BuffType!T readProto(string T, R)(auto ref R src)
418 	if(isProtoInputRange!R && T.msgType == "string".msgType)
419 {
420 	BuffType!T ret;
421 	auto len = src.readProto!"uint32"();
422 	ret.reserve(len);
423 	foreach(i; 0..len) {
424 		ret ~= src.front;
425 		src.popFront();
426 	}
427 	return ret;
428 }
429 
430 /*******************************************************************************
431  * Test a range for being a valid ProtoBuf output range
432  *
433  * Params:
434  *     R = type to test
435  * Returns: The value
436  */
437 
438 enum isProtoOutputRange(R) = isOutputRange!(R, ubyte);
439 
440 /*******************************************************************************
441  * Encode a value into a series of bytes
442  *
443  * Params:
444  *     r = output range
445  *     src = The raw data
446  * Returns: The encoded value
447  */
448 void writeProto(string T, R)(ref R r, BuffType!T src)
449 	if(isProtoOutputRange!R && T.msgType == "int32".msgType)
450 {
451   static if(T == "sint32" || T == "sint64"){
452     toVarint(r, src.toZigZag);
453   } else{
454     toVarint(r, src);
455   }
456 }
457 
458 /// Ditto
459 void writeProto(string T, R)(ref R r, BuffType!T src)
460 	if(isProtoOutputRange!R &&
461 	  (T.msgType == "double".msgType || T.msgType == "float".msgType))
462 {
463 	import std.bitmanip : nativeToLittleEndian;
464 	r.put(src.nativeToLittleEndian!(BuffType!T)[]);
465 }
466 
467 /// Ditto
468 void writeProto(string T, R)(ref R r, const BuffType!T src)
469 	if(isProtoOutputRange!R && T.msgType == "string".msgType)
470 {
471 	toVarint(r, src.length);
472 	r.put(cast(ubyte[])src);
473 }
474 
475 // Unit test for issue #115
476 unittest
477 {
478 	static if (__traits(compiles, {import std.meta : AliasSeq;})) {
479 		import std.meta : AliasSeq;
480 	} else {
481 		import std.typetuple : TypeTuple;
482 		alias AliasSeq = TypeTuple;
483 	}
484 
485   for(int counter=0;counter<2;counter++){
486     foreach (T; AliasSeq!("bool", "int32", "uint32", "fixed32", "int64", "uint64", "fixed64", "sfixed32", "sfixed64", "sint64", "sint32")) {
487       alias T2 = BuffType!T;
488       auto r = appender!(ubyte[])();
489       static if (is(T2 == bool))
490         T2 src = counter==0 ? false : true;
491       else
492         T2 src = counter==0 ? -1 : 5;
493       r.writeProto!T(src);
494 
495       T2 src2 = readProto!T(r.data);
496       import std.conv:text;
497       assert(src == src2, text("error: ", T.stringof, " ", src2, " ", src));
498     }
499   }
500 }
501 
502 /*******************************************************************************
503  * Simple range that ignores data but counts the length
504  */
505 struct CntRange
506 {
507 @nogc:
508 	size_t cnt;
509 	void put(in ubyte) @safe { ++cnt; }
510 	void put(in ubyte[] ary) @safe { cnt += ary.length; }
511 	alias cnt this;
512 }