1 /*******************************************************************************
2  * Serialization/deserialization code
3  *
4  * Author: Matthew Soucy, msoucy@csh.rit.edu
5  * Date: Oct 5, 2013
6  * Version: 0.0.2
7  */
8 module dproto.serialize;
9 
10 import dproto.exception;
11 import dproto.compat;
12 
13 import std.algorithm;
14 import std.array;
15 import std.conv;
16 import std.exception;
17 import std.range;
18 import std.system : Endian;
19 import std.traits;
20 
21 /*******************************************************************************
22  * Returns whether the given string is a protocol buffer primitive
23  *
24  * Params:
25  *  	type = The type to check for
26  * Returns: True if the type is a protocol buffer primitive
27  */
28 bool isBuiltinType(string type) @safe pure nothrow {
29 	return ["int32" , "sint32", "int64", "sint64", "uint32", "uint64", "bool",
30 			"fixed64", "sfixed64", "double", "bytes", "string",
31 			"fixed32", "sfixed32", "float"].canFind(type);
32 }
33 
34 unittest {
35 	assert(isBuiltinType("sfixed32") == true);
36 	assert(isBuiltinType("double") == true);
37 	assert(isBuiltinType("string") == true);
38 	assert(isBuiltinType("int128") == false);
39 	assert(isBuiltinType("quad") == false);
40 }
41 
42 template PossiblyNullable(T) {
43 	static if(is(T == enum)) {
44 		alias PossiblyNullable = T;
45 	} else {
46 		import std.typecons : Nullable;
47 		alias PossiblyNullable = Nullable!T;
48 	}
49 }
50 
51 template UnspecifiedDefaultValue(T) {
52 	static if(is(T == enum)) {
53 		import std.traits : EnumMembers;
54 		enum UnspecifiedDefaultValue = EnumMembers!(T)[0];
55 	} else {
56 		enum UnspecifiedDefaultValue = T.init;
57 	}
58 }
59 
60 template SpecifiedDefaultValue(T, string value) {
61 	import std.conv : to;
62 	enum SpecifiedDefaultValue = to!T(value);
63 }
64 
65 /*******************************************************************************
66  * Maps the given type string to the data type it represents
67  */
68 template BuffType(string T) {
69 	// Msg type 0
70 	static if(T == "int32"  || T == "sint32") alias BuffType = int;
71 	else static if(T == "int64" || T == "sint64") alias BuffType = long;
72 	else static if(T == "uint32") alias BuffType = uint;
73 	else static if(T == "uint64") alias BuffType = ulong;
74 	else static if(T == "bool") alias BuffType = bool;
75 	// Msg type 1
76 	else static if(T == "fixed64") alias BuffType = ulong;
77 	else static if(T == "sfixed64") alias BuffType = long;
78 	else static if(T == "double") alias BuffType = double;
79 	// Msg type 2
80 	else static if(T == "bytes") alias BuffType = ubyte[];
81 	else static if(T == "string") alias BuffType = string;
82 	// Msg type 3,4 deprecated. Will not support.
83 	// Msg type 5
84 	else static if(T == "fixed32") alias BuffType = uint;
85 	else static if(T == "sfixed32") alias BuffType = int;
86 	else static if(T == "float") alias BuffType = float;
87 }
88 
89 unittest {
90 	assert(is(BuffType!"sfixed32" == int) == true);
91 	assert(is(BuffType!"double" == double) == true);
92 	assert(is(BuffType!"string" == string) == true);
93 	assert(is(BuffType!"bytes" : const ubyte[]) == true);
94 	assert(is(BuffType!"sfixed64" == int) == false);
95 }
96 
97 /*******************************************************************************
98  * Removes bytes from the range as if it were read in
99  *
100  * Params:
101  *  	header = The data header
102  *  	data   = The data to read from
103  */
104 void defaultDecode(R)(ulong header, ref R data)
105 	if(isInputRange!R && is(ElementType!R : const ubyte))
106 {
107 	switch(header.wireType) {
108 		case 0:
109 			data.readProto!"int32"();
110 			break;
111 		case 1:
112 			data.readProto!"fixed64"();
113 			break;
114 		case 2:
115 			data.readProto!"bytes"();
116 			break;
117 		case 5:
118 			data.readProto!"fixed32"();
119 			break;
120 		default:
121 			break;
122 	}
123 }
124 
125 /*******************************************************************************
126  * Maps the given type string to the wire type number
127  */
128 @nogc
129 auto msgType(string T) pure nothrow @safe {
130 	switch(T) {
131 		case "int32", "sint32", "uint32":
132 		case "int64", "sint64", "uint64":
133 		case "bool":
134 			return 0;
135 		case "fixed64", "sfixed64", "double":
136 			return 1;
137 		case "bytes", "string":
138 			return 2;
139 		case "fixed32", "sfixed32", "float":
140 			return 5;
141 		default:
142 			return 2;
143 	}
144 }
145 
146 /*******************************************************************************
147  * Encodes a number in its zigzag encoding
148  *
149  * Params:
150  *  	src = The raw integer to encode
151  * Returns: The zigzag-encoded value
152  */
153 @nogc Unsigned!T toZigZag(T)(in T src) pure nothrow @safe @property
154 	if(isIntegral!T && isSigned!T)
155 {
156 	return cast(Unsigned!T)(
157 			src >= 0 ?
158 				src * 2 :
159 				-src * 2 - 1
160 		);
161 }
162 
163 unittest {
164 	assert(0.toZigZag() == 0);
165 	assert((-1).toZigZag() == 1);
166 	assert(1.toZigZag() == 2);
167 	assert((-2).toZigZag() == 3);
168 	assert(2147483647.toZigZag() == 4294967294);
169 	assert((-2147483648).toZigZag() == 4294967295);
170 }
171 
172 /*******************************************************************************
173  * Decodes a number from its zigzag encoding
174  *
175  * Params:
176  *  	src = The zigzag-encoded value to decode
177  * Returns: The raw integer
178  */
179 @nogc Signed!T fromZigZag(T)(in T src) pure nothrow @safe @property
180 	if(isIntegral!T && isUnsigned!T)
181 {
182 	return (src & 1) ?
183 		-(src >> 1) - 1 :
184 		src >> 1;
185 }
186 
187 unittest {
188 	assert(0U.fromZigZag() == 0);
189 	assert(1U.fromZigZag() == -1);
190 	assert(2U.fromZigZag() == 1);
191 	assert(3U.fromZigZag() == -2);
192 	assert(4294967294U.fromZigZag() == 2147483647);
193 	assert(4294967295U.fromZigZag() == -2147483648);
194 }
195 
196 /*******************************************************************************
197  * Get the wire type from the encoding value
198  *
199  * Params:
200  *  	data = The data header
201  * Returns: The wire type value
202  */
203 @nogc ubyte wireType(ulong data) @safe @property pure nothrow {
204 	return data&7;
205 }
206 
207 unittest {
208 	assert((0x08).wireType() == 0); // Test for varints
209 	assert((0x09).wireType() == 1); // Test 64-bit
210 	assert((0x12).wireType() == 2); // Test length-delimited
211 }
212 
213 /*******************************************************************************
214  * Get the message number from the encoding value
215  *
216  * Params:
217  *  	data = The data header
218  * Returns: The message number
219  */
220 @nogc ulong msgNum(ulong data) @safe @property pure nothrow {
221 	return data>>3;
222 }
223 
224 unittest {
225 	assert((0x08).msgNum() == 1);
226 	assert((0x11).msgNum() == 2);
227 	assert((0x1a).msgNum() == 3);
228 	assert((0x22).msgNum() == 4);
229 }
230 
231 /*******************************************************************************
232  * Read a VarInt-encoded value from a data stream
233  *
234  * Removes the bytes that represent the data from the stream
235  *
236  * Params:
237  *  	src = The data stream
238  * Returns: The decoded value
239  */
240 T readVarint(R, T = ulong)(auto ref R src)
241 	if(isInputRange!R && is(ElementType!R : const ubyte))
242 {
243 	auto i = src.countUntil!( a=>!(a&0x80) )() + 1;
244 	auto ret = src.take(i);
245 	src.popFrontExactly(i);
246 	return ret.fromVarint();
247 }
248 
249 /*******************************************************************************
250  * Encode an unsigned value into a VarInt-encoded series of bytes
251  *
252  * Params:
253  *  	r = output range
254  *  	src = The value to encode
255  * Returns: The created VarInt
256  */
257 void toVarint(R, T)(ref R r, T src) @safe @property
258 	if(isOutputRange!(R, ubyte) && isIntegral!T && isUnsigned!T)
259 {
260 	immutable ubyte maxMask = 0b_1000_0000;
261 
262 	while( src >= maxMask )
263 	{
264 		r.put(cast(ubyte)(src | maxMask));
265 		src >>= 7;
266 	}
267 
268 	r.put(cast(ubyte) src);
269 }
270 
271 /*******************************************************************************
272  * Encode a signed value into a VarInt-encoded series of bytes
273  *
274  * This function is useful for encode int32 and int64 value types
275  * (Do not confuse it with signed values encoded by ZigZag!)
276  *
277  * Params:
278  *  	r = output range
279  *  	src = The value to encode
280  * Returns: The created VarInt
281  */
282 void toVarint(R)(ref R r, long src) @safe @property
283 	if(isOutputRange!(R, ubyte))
284 {
285 	ulong u = src;
286 	toVarint(r, u);
287 }
288 
289 unittest {
290 	static ubyte[] toVarint(ulong val) @property
291 	{
292 		auto r = appender!(ubyte[])();
293 		.toVarint(r, val);
294 		return r.data;
295 	}
296 	assert(equal(toVarint(150), [0x96, 0x01]));
297 	assert(equal(toVarint(3), [0x03]));
298 	assert(equal(toVarint(270), [0x8E, 0x02]));
299 	assert(equal(toVarint(86942), [0x9E, 0xA7, 0x05]));
300 	assert(equal(toVarint(ubyte.max), [0xFF, 0x01]));
301 	assert(equal(toVarint(uint.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xF]));
302 	assert(equal(toVarint(ulong.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]));
303 	assert(equal(toVarint(-1), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]));
304 	assert(toVarint(-12345).fromVarint!int == -12345);
305 	assert(toVarint(int.min).fromVarint!int == int.min);
306 }
307 
308 /*******************************************************************************
309  * Decode a VarInt-encoded series of bytes into an unsigned value
310  *
311  * Params:
312  *  	src = The data stream
313  * Returns: The decoded value
314  */
315 T fromVarint(T = ulong, R)(R src) @property
316 	if(isInputRange!R && is(ElementType!R : const ubyte) &&
317 		isIntegral!T && isUnsigned!T)
318 {
319 	immutable ubyte mask = 0b_0111_1111;
320 	T ret;
321 
322 	size_t offset;
323 	foreach(val; src)
324 	{
325 		ret |= cast(T)(val & mask) << offset;
326 
327 		enforce(
328 				offset < T.sizeof * 8,
329 				"Varint value is too big for the type " ~ T.stringof
330 			);
331 
332 		offset += 7;
333 	}
334 
335 	return ret;
336 }
337 
338 /*******************************************************************************
339  * Decode a VarInt-encoded series of bytes into a signed value
340  *
341  * Params:
342  *  	src = The data stream
343  * Returns: The decoded value
344  */
345 T fromVarint(T, R)(R src) @property
346 	if(isInputRange!R && is(ElementType!R : const ubyte) &&
347 		isIntegral!T && isSigned!T)
348 {
349 	long r = fromVarint!ulong(src);
350 	return r.to!T;
351 }
352 
353 unittest {
354 	ubyte[] ubs(ubyte[] vals...) {
355 		return vals.dup;
356 	}
357 
358 	assert(ubs(0x96, 0x01).fromVarint() == 150);
359 	assert(ubs(0x03).fromVarint() == 3);
360 	assert(ubs(0x8E, 0x02).fromVarint() == 270);
361 	assert(ubs(0x9E, 0xA7, 0x05).fromVarint() == 86942);
362 	assert(ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint!int() == -1);
363 
364 	bool overflow = false;
365 	try
366 		ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint();
367 	catch(Exception)
368 		overflow = true;
369 	finally
370 		assert(overflow);
371 }
372 
373 /// The type to encode an enum as
374 enum ENUM_SERIALIZATION = "int32";
375 /// The message type to encode a packed message as
376 enum PACKED_MSG_TYPE = 2;
377 
378 /*******************************************************************************
379  * Test a range for being a valid ProtoBuf input range
380  *
381  * Params:
382  *     R = type to test
383  * Returns: The value
384  */
385 
386 enum isProtoInputRange(R) = isInputRange!R && is(ElementType!R : const ubyte);
387 
388 /*******************************************************************************
389  * Decode a series of bytes into a value
390  *
391  * Params:
392  *  	src = The data stream
393  * Returns: The decoded value
394  */
395 BuffType!T readProto(string T, R)(auto ref R src)
396 	if(isProtoInputRange!R && T.msgType == "int32".msgType)
397 {
398 	static if(T == "sint32" || T == "sint64")
399 		return src.readVarint().fromZigZag().to!(BuffType!T)();
400 	else
401 		return src.readVarint().to!(BuffType!T)();
402 }
403 
404 /// Ditto
405 BuffType!T readProto(string T, R)(auto ref R src)
406 	if(isProtoInputRange!R &&
407 	  (T.msgType == "double".msgType || T.msgType == "float".msgType))
408 {
409 	import std.bitmanip : read, Endian;
410 	return src.read!(BuffType!T, Endian.littleEndian)();
411 }
412 
413 /// Ditto
414 BuffType!T readProto(string T, R)(auto ref R src)
415 	if(isProtoInputRange!R && T.msgType == "string".msgType)
416 {
417 	BuffType!T ret;
418 	auto len = src.readProto!"uint32"();
419 	ret.reserve(len);
420 	foreach(i; 0..len) {
421 		ret ~= src.front;
422 		src.popFront();
423 	}
424 	return ret;
425 }
426 
427 /*******************************************************************************
428  * Test a range for being a valid ProtoBuf output range
429  *
430  * Params:
431  *     R = type to test
432  * Returns: The value
433  */
434 
435 enum isProtoOutputRange(R) = isOutputRange!(R, ubyte);
436 
437 /*******************************************************************************
438  * Encode a value into a series of bytes
439  *
440  * Params:
441  *     r = output range
442  *     src = The raw data
443  * Returns: The encoded value
444  */
445 void writeProto(string T, R)(ref R r, const BuffType!T src)
446 	if(isProtoOutputRange!R && (T == "sint32" || T == "sint64"))
447 {
448 	toVarint(r, src.toZigZag);
449 }
450 
451 /// Ditto
452 void writeProto(string T, R)(ref R r, BuffType!T src)
453 	if(isProtoOutputRange!R && T.msgType == "int32".msgType)
454 {
455 	toVarint(r, src);
456 }
457 
458 /// Ditto
459 void writeProto(string T, R)(ref R r, const BuffType!T src)
460 	if(isProtoOutputRange!R &&
461 	  (T.msgType == "double".msgType || T.msgType == "float".msgType))
462 {
463 	import std.bitmanip : nativeToLittleEndian;
464 	r.put(src.nativeToLittleEndian!(BuffType!T)[]);
465 }
466 
467 /// Ditto
468 void writeProto(string T, R)(ref R r, const BuffType!T src)
469 	if(isProtoOutputRange!R && T.msgType == "string".msgType)
470 {
471 	toVarint(r, src.length);
472 	r.put(cast(ubyte[])src);
473 }
474 
475 /*******************************************************************************
476  * Simple range that ignores data but counts the length
477  */
478 struct CntRange
479 {
480 @nogc:
481 	size_t cnt;
482 	void put(in ubyte) @safe { ++cnt; }
483 	void put(in ubyte[] ary) @safe { cnt += ary.length; }
484 	alias cnt this;
485 }