1 /*******************************************************************************
2  * Serialization/deserialization code
3  *
4  * Author: Matthew Soucy, dproto@msoucy.me
5  */
6 module dproto.serialize;
7 
8 import dproto.exception;
9 import dproto.compat;
10 
11 import std.algorithm;
12 import std.array;
13 import std.conv;
14 import std.exception;
15 import std.range;
16 import std.system : Endian;
17 import std.traits;
18 
19 /*******************************************************************************
20  * Returns whether the given string is a protocol buffer primitive
21  *
22  * Params:
23  *  	type = The type to check for
24  * Returns: True if the type is a protocol buffer primitive
25  */
26 bool isBuiltinType(string type) @safe pure nothrow {
27 	return ["int32" , "sint32", "int64", "sint64", "uint32", "uint64", "bool",
28 			"fixed64", "sfixed64", "double", "bytes", "string",
29 			"fixed32", "sfixed32", "float"].canFind(type);
30 }
31 
32 unittest {
33 	assert(isBuiltinType("sfixed32") == true);
34 	assert(isBuiltinType("double") == true);
35 	assert(isBuiltinType("string") == true);
36 	assert(isBuiltinType("int128") == false);
37 	assert(isBuiltinType("quad") == false);
38 }
39 
40 template PossiblyNullable(T) {
41 	static if(is(T == enum)) {
42 		alias PossiblyNullable = T;
43 	} else {
44 		import std.typecons : Nullable;
45 		alias PossiblyNullable = Nullable!T;
46 	}
47 }
48 
49 template UnspecifiedDefaultValue(T) {
50 	static if(is(T == enum)) {
51 		import std.traits : EnumMembers;
52 		enum UnspecifiedDefaultValue = EnumMembers!(T)[0];
53 	} else {
54 		enum UnspecifiedDefaultValue = T.init;
55 	}
56 }
57 
58 template SpecifiedDefaultValue(T, string value) {
59 	import std.conv : to;
60 	enum SpecifiedDefaultValue = to!T(value);
61 }
62 
63 /*******************************************************************************
64  * Maps the given type string to the data type it represents
65  */
66 template BuffType(string T) {
67 	// Msg type 0
68 	static if(T == "int32"  || T == "sint32") alias BuffType = int;
69 	else static if(T == "int64" || T == "sint64") alias BuffType = long;
70 	else static if(T == "uint32") alias BuffType = uint;
71 	else static if(T == "uint64") alias BuffType = ulong;
72 	else static if(T == "bool") alias BuffType = bool;
73 	// Msg type 1
74 	else static if(T == "fixed64") alias BuffType = ulong;
75 	else static if(T == "sfixed64") alias BuffType = long;
76 	else static if(T == "double") alias BuffType = double;
77 	// Msg type 2
78 	else static if(T == "bytes") alias BuffType = ubyte[];
79 	else static if(T == "string") alias BuffType = string;
80 	// Msg type 3,4 deprecated. Will not support.
81 	// Msg type 5
82 	else static if(T == "fixed32") alias BuffType = uint;
83 	else static if(T == "sfixed32") alias BuffType = int;
84 	else static if(T == "float") alias BuffType = float;
85 }
86 
87 unittest {
88 	assert(is(BuffType!"sfixed32" == int) == true);
89 	assert(is(BuffType!"double" == double) == true);
90 	assert(is(BuffType!"string" == string) == true);
91 	assert(is(BuffType!"bytes" : const ubyte[]) == true);
92 	assert(is(BuffType!"sfixed64" == int) == false);
93 }
94 
95 /*******************************************************************************
96  * Removes bytes from the range as if it were read in
97  *
98  * Params:
99  *  	header = The data header
100  *  	data   = The data to read from
101  */
102 void defaultDecode(R)(ulong header, ref R data)
103 	if(isInputRange!R && is(ElementType!R : const ubyte))
104 {
105 	switch(header.wireType) {
106 		case 0:
107 			data.readProto!"int32"();
108 			break;
109 		case 1:
110 			data.readProto!"fixed64"();
111 			break;
112 		case 2:
113 			data.readProto!"bytes"();
114 			break;
115 		case 5:
116 			data.readProto!"fixed32"();
117 			break;
118 		default:
119 			break;
120 	}
121 }
122 
123 /*******************************************************************************
124  * Maps the given type string to the wire type number
125  */
126 @nogc
127 auto msgType(string T) pure nothrow @safe {
128 	switch(T) {
129 		case "int32", "sint32", "uint32":
130 		case "int64", "sint64", "uint64":
131 		case "bool":
132 			return 0;
133 		case "fixed64", "sfixed64", "double":
134 			return 1;
135 		case "bytes", "string":
136 			return 2;
137 		case "fixed32", "sfixed32", "float":
138 			return 5;
139 		default:
140 			return 2;
141 	}
142 }
143 
144 /*******************************************************************************
145  * Encodes a number in its zigzag encoding
146  *
147  * Params:
148  *  	src = The raw integer to encode
149  * Returns: The zigzag-encoded value
150  */
151 @nogc Unsigned!T toZigZag(T)(T src) pure nothrow @safe @property
152 	if(isIntegral!T && isSigned!T)
153 {
154 	return cast(Unsigned!T)(
155 			src >= 0 ?
156 				src * 2 :
157 				-src * 2 - 1
158 		);
159 }
160 
161 unittest {
162 	assert(0.toZigZag() == 0);
163 	assert((-1).toZigZag() == 1);
164 	assert(1.toZigZag() == 2);
165 	assert((-2).toZigZag() == 3);
166 	assert(2147483647.toZigZag() == 4294967294);
167 	assert((-2147483648).toZigZag() == 4294967295);
168 }
169 
170 /*******************************************************************************
171  * Decodes a number from its zigzag encoding
172  *
173  * Params:
174  *  	src = The zigzag-encoded value to decode
175  * Returns: The raw integer
176  */
177 @nogc Signed!T fromZigZag(T)(T src) pure nothrow @safe @property
178 	if(isIntegral!T && isUnsigned!T)
179 {
180 	return (src & 1) ?
181 		-(src >> 1) - 1 :
182 		src >> 1;
183 }
184 
185 unittest {
186 	assert(0U.fromZigZag() == 0);
187 	assert(1U.fromZigZag() == -1);
188 	assert(2U.fromZigZag() == 1);
189 	assert(3U.fromZigZag() == -2);
190 	assert(4294967294U.fromZigZag() == 2147483647);
191 	assert(4294967295U.fromZigZag() == -2147483648);
192 
193   foreach(i;-3..3){
194     assert(i.toZigZag.fromZigZag == i);
195     long i2=i;
196     assert(i2.toZigZag.fromZigZag == i2);
197   }
198 }
199 
200 /*******************************************************************************
201  * Get the wire type from the encoding value
202  *
203  * Params:
204  *  	data = The data header
205  * Returns: The wire type value
206  */
207 @nogc ubyte wireType(ulong data) @safe @property pure nothrow {
208 	return data&7;
209 }
210 
211 unittest {
212 	assert((0x08).wireType() == 0); // Test for varints
213 	assert((0x09).wireType() == 1); // Test 64-bit
214 	assert((0x12).wireType() == 2); // Test length-delimited
215 }
216 
217 /*******************************************************************************
218  * Get the message number from the encoding value
219  *
220  * Params:
221  *  	data = The data header
222  * Returns: The message number
223  */
224 @nogc ulong msgNum(ulong data) @safe @property pure nothrow {
225 	return data>>3;
226 }
227 
228 unittest {
229 	assert((0x08).msgNum() == 1);
230 	assert((0x11).msgNum() == 2);
231 	assert((0x1a).msgNum() == 3);
232 	assert((0x22).msgNum() == 4);
233 }
234 
235 /*******************************************************************************
236  * Read a VarInt-encoded value from a data stream
237  *
238  * Removes the bytes that represent the data from the stream
239  *
240  * Params:
241  *  	src = The data stream
242  * Returns: The decoded value
243  */
244 T readVarint(T = ulong, R)(auto ref R src)
245 	if(isInputRange!R && is(ElementType!R : const ubyte))
246 {
247 	auto i = src.countUntil!( a=>!(a&0x80) )() + 1;
248 	auto ret = src.take(i);
249 	src.popFrontExactly(i);
250 	return ret.fromVarint!T();
251 }
252 
253 /*******************************************************************************
254  * Encode an unsigned value into a VarInt-encoded series of bytes
255  *
256  * Params:
257  *  	r = output range
258  *  	src = The value to encode
259  * Returns: The created VarInt
260  */
261 void toVarint(R, T)(ref R r, T src) @safe @property
262 	if(isOutputRange!(R, ubyte) && isIntegral!T && isUnsigned!T)
263 {
264 	immutable ubyte maxMask = 0b_1000_0000;
265 
266 	while( src >= maxMask )
267 	{
268 		r.put(cast(ubyte)(src | maxMask));
269 		src >>= 7;
270 	}
271 
272 	r.put(cast(ubyte) src);
273 }
274 
275 /*******************************************************************************
276  * Encode a signed value into a VarInt-encoded series of bytes
277  *
278  * This function is useful for encode int32 and int64 value types
279  * (Do not confuse it with signed values encoded by ZigZag!)
280  *
281  * Params:
282  *  	r = output range
283  *  	src = The value to encode
284  * Returns: The created VarInt
285  */
286 void toVarint(R)(ref R r, long src) @safe @property
287 	if(isOutputRange!(R, ubyte))
288 {
289 	ulong u = src;
290 	toVarint(r, u);
291 }
292 
293 unittest {
294 	static ubyte[] toVarint(ulong val) @property
295 	{
296 		auto r = appender!(ubyte[])();
297 		.toVarint(r, val);
298 		return r.data;
299 	}
300 	assert(equal(toVarint(150), [0x96, 0x01]));
301 	assert(equal(toVarint(3), [0x03]));
302 	assert(equal(toVarint(270), [0x8E, 0x02]));
303 	assert(equal(toVarint(86942), [0x9E, 0xA7, 0x05]));
304 	assert(equal(toVarint(ubyte.max), [0xFF, 0x01]));
305 	assert(equal(toVarint(uint.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xF]));
306 	assert(equal(toVarint(ulong.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]));
307 	assert(equal(toVarint(-1), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]));
308 	assert(toVarint(-12345).fromVarint!int == -12345);
309 	assert(toVarint(int.min).fromVarint!int == int.min);
310 }
311 
312 /*******************************************************************************
313  * Decode a VarInt-encoded series of bytes into an unsigned value
314  *
315  * Params:
316  *  	src = The data stream
317  * Returns: The decoded value
318  */
319 T fromVarint(T = ulong, R)(R src) @property
320 	if(isInputRange!R && is(ElementType!R : const ubyte) &&
321 		isIntegral!T && isUnsigned!T)
322 {
323 	immutable ubyte mask = 0b_0111_1111;
324 	T ret;
325 
326 	size_t offset;
327 	foreach(val; src)
328 	{
329 		ret |= cast(T)(val & mask) << offset;
330 
331 		enforce(
332 				offset < T.sizeof * 8,
333 				"Varint value is too big for the type " ~ T.stringof
334 			);
335 
336 		offset += 7;
337 	}
338 
339 	return ret;
340 }
341 
342 /*******************************************************************************
343  * Decode a VarInt-encoded series of bytes into a signed value
344  *
345  * Params:
346  *  	src = The data stream
347  * Returns: The decoded value
348  */
349 T fromVarint(T, R)(R src) @property
350 	if(isInputRange!R && is(ElementType!R : const ubyte) &&
351 		isIntegral!T && isSigned!T)
352 {
353 	long r = fromVarint!ulong(src);
354 	return r.to!T;
355 }
356 
357 unittest {
358 	ubyte[] ubs(ubyte[] vals...) {
359 		return vals.dup;
360 	}
361 
362 	assert(ubs(0x96, 0x01).fromVarint() == 150);
363 	assert(ubs(0x03).fromVarint() == 3);
364 	assert(ubs(0x8E, 0x02).fromVarint() == 270);
365 	assert(ubs(0x9E, 0xA7, 0x05).fromVarint() == 86942);
366 	assert(ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint!int() == -1);
367 
368 	bool overflow = false;
369 	try
370 		ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint();
371 	catch(Exception)
372 		overflow = true;
373 	finally
374 		assert(overflow);
375 }
376 
377 /// The type to encode an enum as
378 enum ENUM_SERIALIZATION = "int32";
379 /// The message type to encode a packed message as
380 enum PACKED_MSG_TYPE = 2;
381 
382 /*******************************************************************************
383  * Test a range for being a valid ProtoBuf input range
384  *
385  * Params:
386  *     R = type to test
387  * Returns: The value
388  */
389 
390 enum isProtoInputRange(R) = isInputRange!R && is(ElementType!R : const ubyte);
391 
392 /*******************************************************************************
393  * Decode a series of bytes into a value
394  *
395  * Params:
396  *  	src = The data stream
397  * Returns: The decoded value
398  */
399 BuffType!T readProto(string T, R)(auto ref R src)
400 	if(isProtoInputRange!R && T.msgType == "int32".msgType)
401 {
402 	alias BT = BuffType!T;
403 	static if(T == "sint32" || T == "sint64")
404 		return src.readVarint!(Unsigned!BT).fromZigZag;
405 	else static if(T == "bool")
406 		return src.readVarint.to!BT;
407 	else
408 		return src.readVarint!BT;
409 }
410 
411 /// Ditto
412 BuffType!T readProto(string T, R)(auto ref R src)
413 	if(isProtoInputRange!R &&
414 	  (T.msgType == "double".msgType || T.msgType == "float".msgType))
415 {
416 	import std.bitmanip : read, Endian;
417 	return src.read!(BuffType!T, Endian.littleEndian)();
418 }
419 
420 /// Ditto
421 BuffType!T readProto(string T, R)(auto ref R src)
422 	if(isProtoInputRange!R && T.msgType == "string".msgType)
423 {
424 	BuffType!T ret;
425 	auto len = src.readProto!"uint32"();
426 	ret.reserve(len);
427 	foreach(i; 0..len) {
428 		ret ~= src.front;
429 		src.popFront();
430 	}
431 	return ret;
432 }
433 
434 /*******************************************************************************
435  * Test a range for being a valid ProtoBuf output range
436  *
437  * Params:
438  *     R = type to test
439  * Returns: The value
440  */
441 
442 enum isProtoOutputRange(R) = isOutputRange!(R, ubyte);
443 
444 /*******************************************************************************
445  * Encode a value into a series of bytes
446  *
447  * Params:
448  *     r = output range
449  *     src = The raw data
450  * Returns: The encoded value
451  */
452 void writeProto(string T, R)(ref R r, BuffType!T src)
453 	if(isProtoOutputRange!R && T.msgType == "int32".msgType)
454 {
455   static if(T == "sint32" || T == "sint64"){
456     toVarint(r, src.toZigZag);
457   } else{
458     toVarint(r, src);
459   }
460 }
461 
462 /// Ditto
463 void writeProto(string T, R)(ref R r, BuffType!T src)
464 	if(isProtoOutputRange!R &&
465 	  (T.msgType == "double".msgType || T.msgType == "float".msgType))
466 {
467 	import std.bitmanip : nativeToLittleEndian;
468 	r.put(src.nativeToLittleEndian!(BuffType!T)[]);
469 }
470 
471 /// Ditto
472 void writeProto(string T, R)(ref R r, const BuffType!T src)
473 	if(isProtoOutputRange!R && T.msgType == "string".msgType)
474 {
475 	toVarint(r, src.length);
476 	r.put(cast(ubyte[])src);
477 }
478 
479 // Unit test for issue #115
480 unittest
481 {
482 	static if (__traits(compiles, {import std.meta : AliasSeq;})) {
483 		import std.meta : AliasSeq;
484 	} else {
485 		import std.typetuple : TypeTuple;
486 		alias AliasSeq = TypeTuple;
487 	}
488 
489   for(int counter=0;counter<2;counter++){
490     foreach (T; AliasSeq!("bool", "int32", "uint32", "fixed32", "int64", "uint64", "fixed64", "sfixed32", "sfixed64", "sint64", "sint32")) {
491       alias T2 = BuffType!T;
492       auto r = appender!(ubyte[])();
493       static if (is(T2 == bool))
494         T2 src = counter==0 ? false : true;
495       else
496         T2 src = counter==0 ? -1 : 5;
497       r.writeProto!T(src);
498 
499       T2 src2 = readProto!T(r.data);
500       import std.conv:text;
501       assert(src == src2, text("error: ", T.stringof, " ", src2, " ", src));
502     }
503   }
504 }
505 
506 /*******************************************************************************
507  * Simple range that ignores data but counts the length
508  */
509 struct CntRange
510 {
511 @nogc:
512 	size_t cnt;
513 	void put(in ubyte) @safe { ++cnt; }
514 	void put(in ubyte[] ary) @safe { cnt += ary.length; }
515 	alias cnt this;
516 }