1 /******************************************************************************* 2 * Serialization/deserialization code 3 * 4 * Author: Matthew Soucy, msoucy@csh.rit.edu 5 * Date: Oct 5, 2013 6 * Version: 0.0.2 7 */ 8 module dproto.serialize; 9 10 import dproto.exception; 11 import dproto.compat; 12 13 import std.algorithm; 14 import std.array; 15 import std.conv; 16 import std.exception; 17 import std.range; 18 import std.system : Endian; 19 import std.traits; 20 21 /******************************************************************************* 22 * Returns whether the given string is a protocol buffer primitive 23 * 24 * Params: 25 * type = The type to check for 26 * Returns: True if the type is a protocol buffer primitive 27 */ 28 bool isBuiltinType(string type) @safe pure nothrow { 29 return ["int32" , "sint32", "int64", "sint64", "uint32", "uint64", "bool", 30 "fixed64", "sfixed64", "double", "bytes", "string", 31 "fixed32", "sfixed32", "float"].canFind(type); 32 } 33 34 unittest { 35 assert(isBuiltinType("sfixed32") == true); 36 assert(isBuiltinType("double") == true); 37 assert(isBuiltinType("string") == true); 38 assert(isBuiltinType("int128") == false); 39 assert(isBuiltinType("quad") == false); 40 } 41 42 template PossiblyNullable(T) { 43 static if(is(T == enum)) { 44 alias PossiblyNullable = T; 45 } else { 46 import std.typecons : Nullable; 47 alias PossiblyNullable = Nullable!T; 48 } 49 } 50 51 template UnspecifiedDefaultValue(T) { 52 static if(is(T == enum)) { 53 import std.traits : EnumMembers; 54 enum UnspecifiedDefaultValue = EnumMembers!(T)[0]; 55 } else { 56 enum UnspecifiedDefaultValue = T.init; 57 } 58 } 59 60 template SpecifiedDefaultValue(T, string value) { 61 import std.conv : to; 62 enum SpecifiedDefaultValue = to!T(value); 63 } 64 65 /******************************************************************************* 66 * Maps the given type string to the data type it represents 67 */ 68 template BuffType(string T) { 69 // Msg type 0 70 static if(T == "int32" || T == "sint32") alias BuffType = int; 71 else static if(T == "int64" || T == "sint64") alias BuffType = long; 72 else static if(T == "uint32") alias BuffType = uint; 73 else static if(T == "uint64") alias BuffType = ulong; 74 else static if(T == "bool") alias BuffType = bool; 75 // Msg type 1 76 else static if(T == "fixed64") alias BuffType = ulong; 77 else static if(T == "sfixed64") alias BuffType = long; 78 else static if(T == "double") alias BuffType = double; 79 // Msg type 2 80 else static if(T == "bytes") alias BuffType = ubyte[]; 81 else static if(T == "string") alias BuffType = string; 82 // Msg type 3,4 deprecated. Will not support. 83 // Msg type 5 84 else static if(T == "fixed32") alias BuffType = uint; 85 else static if(T == "sfixed32") alias BuffType = int; 86 else static if(T == "float") alias BuffType = float; 87 } 88 89 unittest { 90 assert(is(BuffType!"sfixed32" == int) == true); 91 assert(is(BuffType!"double" == double) == true); 92 assert(is(BuffType!"string" == string) == true); 93 assert(is(BuffType!"bytes" : const ubyte[]) == true); 94 assert(is(BuffType!"sfixed64" == int) == false); 95 } 96 97 /******************************************************************************* 98 * Removes bytes from the range as if it were read in 99 * 100 * Params: 101 * header = The data header 102 * data = The data to read from 103 */ 104 void defaultDecode(R)(ulong header, ref R data) 105 if(isInputRange!R && is(ElementType!R : const ubyte)) 106 { 107 switch(header.wireType) { 108 case 0: 109 data.readProto!"int32"(); 110 break; 111 case 1: 112 data.readProto!"fixed64"(); 113 break; 114 case 2: 115 data.readProto!"bytes"(); 116 break; 117 case 5: 118 data.readProto!"fixed32"(); 119 break; 120 default: 121 break; 122 } 123 } 124 125 /******************************************************************************* 126 * Maps the given type string to the wire type number 127 */ 128 @nogc 129 auto msgType(string T) pure nothrow @safe { 130 switch(T) { 131 case "int32", "sint32", "uint32": 132 case "int64", "sint64", "uint64": 133 case "bool": 134 return 0; 135 case "fixed64", "sfixed64", "double": 136 return 1; 137 case "bytes", "string": 138 return 2; 139 case "fixed32", "sfixed32", "float": 140 return 5; 141 default: 142 return 2; 143 } 144 } 145 146 /******************************************************************************* 147 * Encodes a number in its zigzag encoding 148 * 149 * Params: 150 * src = The raw integer to encode 151 * Returns: The zigzag-encoded value 152 */ 153 @nogc Unsigned!T toZigZag(T)(in T src) pure nothrow @safe @property 154 if(isIntegral!T && isSigned!T) 155 { 156 return cast(Unsigned!T)( 157 src >= 0 ? 158 src * 2 : 159 -src * 2 - 1 160 ); 161 } 162 163 unittest { 164 assert(0.toZigZag() == 0); 165 assert((-1).toZigZag() == 1); 166 assert(1.toZigZag() == 2); 167 assert((-2).toZigZag() == 3); 168 assert(2147483647.toZigZag() == 4294967294); 169 assert((-2147483648).toZigZag() == 4294967295); 170 } 171 172 /******************************************************************************* 173 * Decodes a number from its zigzag encoding 174 * 175 * Params: 176 * src = The zigzag-encoded value to decode 177 * Returns: The raw integer 178 */ 179 @nogc Signed!T fromZigZag(T)(in T src) pure nothrow @safe @property 180 if(isIntegral!T && isUnsigned!T) 181 { 182 return (src & 1) ? 183 -(src >> 1) - 1 : 184 src >> 1; 185 } 186 187 unittest { 188 assert(0U.fromZigZag() == 0); 189 assert(1U.fromZigZag() == -1); 190 assert(2U.fromZigZag() == 1); 191 assert(3U.fromZigZag() == -2); 192 assert(4294967294U.fromZigZag() == 2147483647); 193 assert(4294967295U.fromZigZag() == -2147483648); 194 } 195 196 /******************************************************************************* 197 * Get the wire type from the encoding value 198 * 199 * Params: 200 * data = The data header 201 * Returns: The wire type value 202 */ 203 @nogc ubyte wireType(ulong data) @safe @property pure nothrow { 204 return data&7; 205 } 206 207 unittest { 208 assert((0x08).wireType() == 0); // Test for varints 209 assert((0x09).wireType() == 1); // Test 64-bit 210 assert((0x12).wireType() == 2); // Test length-delimited 211 } 212 213 /******************************************************************************* 214 * Get the message number from the encoding value 215 * 216 * Params: 217 * data = The data header 218 * Returns: The message number 219 */ 220 @nogc ulong msgNum(ulong data) @safe @property pure nothrow { 221 return data>>3; 222 } 223 224 unittest { 225 assert((0x08).msgNum() == 1); 226 assert((0x11).msgNum() == 2); 227 assert((0x1a).msgNum() == 3); 228 assert((0x22).msgNum() == 4); 229 } 230 231 /******************************************************************************* 232 * Read a VarInt-encoded value from a data stream 233 * 234 * Removes the bytes that represent the data from the stream 235 * 236 * Params: 237 * src = The data stream 238 * Returns: The decoded value 239 */ 240 T readVarint(R, T = ulong)(auto ref R src) 241 if(isInputRange!R && is(ElementType!R : const ubyte)) 242 { 243 auto i = src.countUntil!( a=>!(a&0x80) )() + 1; 244 auto ret = src.take(i); 245 src.popFrontExactly(i); 246 return ret.fromVarint(); 247 } 248 249 /******************************************************************************* 250 * Encode an unsigned value into a VarInt-encoded series of bytes 251 * 252 * Params: 253 * r = output range 254 * src = The value to encode 255 * Returns: The created VarInt 256 */ 257 void toVarint(R, T)(ref R r, T src) @safe @property 258 if(isOutputRange!(R, ubyte) && isIntegral!T && isUnsigned!T) 259 { 260 immutable ubyte maxMask = 0b_1000_0000; 261 262 while( src >= maxMask ) 263 { 264 r.put(cast(ubyte)(src | maxMask)); 265 src >>= 7; 266 } 267 268 r.put(cast(ubyte) src); 269 } 270 271 /******************************************************************************* 272 * Encode a signed value into a VarInt-encoded series of bytes 273 * 274 * This function is useful for encode int32 and int64 value types 275 * (Do not confuse it with signed values encoded by ZigZag!) 276 * 277 * Params: 278 * r = output range 279 * src = The value to encode 280 * Returns: The created VarInt 281 */ 282 void toVarint(R)(ref R r, long src) @safe @property 283 if(isOutputRange!(R, ubyte)) 284 { 285 ulong u = src; 286 toVarint(r, u); 287 } 288 289 unittest { 290 static ubyte[] toVarint(ulong val) @property 291 { 292 auto r = appender!(ubyte[])(); 293 .toVarint(r, val); 294 return r.data; 295 } 296 assert(equal(toVarint(150), [0x96, 0x01])); 297 assert(equal(toVarint(3), [0x03])); 298 assert(equal(toVarint(270), [0x8E, 0x02])); 299 assert(equal(toVarint(86942), [0x9E, 0xA7, 0x05])); 300 assert(equal(toVarint(ubyte.max), [0xFF, 0x01])); 301 assert(equal(toVarint(uint.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xF])); 302 assert(equal(toVarint(ulong.max), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01])); 303 assert(equal(toVarint(-1), [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01])); 304 assert(toVarint(-12345).fromVarint!int == -12345); 305 assert(toVarint(int.min).fromVarint!int == int.min); 306 } 307 308 /******************************************************************************* 309 * Decode a VarInt-encoded series of bytes into an unsigned value 310 * 311 * Params: 312 * src = The data stream 313 * Returns: The decoded value 314 */ 315 T fromVarint(T = ulong, R)(R src) @property 316 if(isInputRange!R && is(ElementType!R : const ubyte) && 317 isIntegral!T && isUnsigned!T) 318 { 319 immutable ubyte mask = 0b_0111_1111; 320 T ret; 321 322 size_t offset; 323 foreach(val; src) 324 { 325 ret |= cast(T)(val & mask) << offset; 326 327 enforce( 328 offset < T.sizeof * 8, 329 "Varint value is too big for the type " ~ T.stringof 330 ); 331 332 offset += 7; 333 } 334 335 return ret; 336 } 337 338 /******************************************************************************* 339 * Decode a VarInt-encoded series of bytes into a signed value 340 * 341 * Params: 342 * src = The data stream 343 * Returns: The decoded value 344 */ 345 T fromVarint(T, R)(R src) @property 346 if(isInputRange!R && is(ElementType!R : const ubyte) && 347 isIntegral!T && isSigned!T) 348 { 349 long r = fromVarint!ulong(src); 350 return r.to!T; 351 } 352 353 unittest { 354 ubyte[] ubs(ubyte[] vals...) { 355 return vals.dup; 356 } 357 358 assert(ubs(0x96, 0x01).fromVarint() == 150); 359 assert(ubs(0x03).fromVarint() == 3); 360 assert(ubs(0x8E, 0x02).fromVarint() == 270); 361 assert(ubs(0x9E, 0xA7, 0x05).fromVarint() == 86942); 362 assert(ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint!int() == -1); 363 364 bool overflow = false; 365 try 366 ubs(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01).fromVarint(); 367 catch(Exception) 368 overflow = true; 369 finally 370 assert(overflow); 371 } 372 373 /// The type to encode an enum as 374 enum ENUM_SERIALIZATION = "int32"; 375 /// The message type to encode a packed message as 376 enum PACKED_MSG_TYPE = 2; 377 378 /******************************************************************************* 379 * Test a range for being a valid ProtoBuf input range 380 * 381 * Params: 382 * R = type to test 383 * Returns: The value 384 */ 385 386 enum isProtoInputRange(R) = isInputRange!R && is(ElementType!R : const ubyte); 387 388 /******************************************************************************* 389 * Decode a series of bytes into a value 390 * 391 * Params: 392 * src = The data stream 393 * Returns: The decoded value 394 */ 395 BuffType!T readProto(string T, R)(auto ref R src) 396 if(isProtoInputRange!R && T.msgType == "int32".msgType) 397 { 398 static if(T == "sint32" || T == "sint64") 399 return src.readVarint().fromZigZag().to!(BuffType!T)(); 400 else 401 return src.readVarint().to!(BuffType!T)(); 402 } 403 404 /// Ditto 405 BuffType!T readProto(string T, R)(auto ref R src) 406 if(isProtoInputRange!R && 407 (T.msgType == "double".msgType || T.msgType == "float".msgType)) 408 { 409 import std.bitmanip : read, Endian; 410 return src.read!(BuffType!T, Endian.littleEndian)(); 411 } 412 413 /// Ditto 414 BuffType!T readProto(string T, R)(auto ref R src) 415 if(isProtoInputRange!R && T.msgType == "string".msgType) 416 { 417 BuffType!T ret; 418 auto len = src.readProto!"uint32"(); 419 ret.reserve(len); 420 foreach(i; 0..len) { 421 ret ~= src.front; 422 src.popFront(); 423 } 424 return ret; 425 } 426 427 /******************************************************************************* 428 * Test a range for being a valid ProtoBuf output range 429 * 430 * Params: 431 * R = type to test 432 * Returns: The value 433 */ 434 435 enum isProtoOutputRange(R) = isOutputRange!(R, ubyte); 436 437 /******************************************************************************* 438 * Encode a value into a series of bytes 439 * 440 * Params: 441 * r = output range 442 * src = The raw data 443 * Returns: The encoded value 444 */ 445 void writeProto(string T, R)(ref R r, const BuffType!T src) 446 if(isProtoOutputRange!R && (T == "sint32" || T == "sint64")) 447 { 448 toVarint(r, src.toZigZag); 449 } 450 451 /// Ditto 452 void writeProto(string T, R)(ref R r, BuffType!T src) 453 if(isProtoOutputRange!R && T.msgType == "int32".msgType) 454 { 455 toVarint(r, src); 456 } 457 458 /// Ditto 459 void writeProto(string T, R)(ref R r, const BuffType!T src) 460 if(isProtoOutputRange!R && 461 (T.msgType == "double".msgType || T.msgType == "float".msgType)) 462 { 463 import std.bitmanip : nativeToLittleEndian; 464 r.put(src.nativeToLittleEndian!(BuffType!T)[]); 465 } 466 467 /// Ditto 468 void writeProto(string T, R)(ref R r, const BuffType!T src) 469 if(isProtoOutputRange!R && T.msgType == "string".msgType) 470 { 471 toVarint(r, src.length); 472 r.put(cast(ubyte[])src); 473 } 474 475 /******************************************************************************* 476 * Simple range that ignores data but counts the length 477 */ 478 struct CntRange 479 { 480 @nogc: 481 size_t cnt; 482 void put(in ubyte) @safe { ++cnt; } 483 void put(in ubyte[] ary) @safe { cnt += ary.length; } 484 alias cnt this; 485 }