1 /******************************************************************************* 2 * Convert a .proto file into a string representing the class 3 * 4 * Author: Matthew Soucy, dproto@msoucy.me 5 */ 6 module dproto.parse; 7 8 import dproto.exception; 9 import dproto.intermediate; 10 import dproto.serialize : isBuiltinType; 11 12 import std.algorithm; 13 import std.array; 14 import std.ascii; 15 import std.conv; 16 import std.exception; 17 import std.format; 18 import std.stdio; 19 import std..string; 20 import std.traits; 21 22 enum wordPattern = std.ascii.letters ~ std.ascii.digits ~ `_.\-`; 23 enum pathPattern = wordPattern ~ `/`; 24 25 /** 26 * Basic parser for {@code .proto} schema declarations. 27 * 28 * <p>This parser throws away data that it doesn't care about. In particular, 29 * unrecognized options, and extensions are discarded. It doesn't retain nesting 30 * within types. 31 */ 32 ProtoPackage ParseProtoSchema(const string name_, string data_) 33 { 34 35 struct ProtoSchemaParser { 36 37 /** The path to the {@code .proto} file. */ 38 string fileName; 39 40 /** The entire document. */ 41 const char[] data; 42 43 /** Our cursor within the document. {@code data[pos]} is the next character to be read. */ 44 int pos; 45 46 /** The number of newline characters encountered thus far. */ 47 int line; 48 49 /** The index of the most recent newline character. */ 50 int lineStart; 51 52 /** Are we parsing proto 3 syntax? */ 53 bool isProto3; 54 55 ProtoPackage readProtoPackage() { 56 auto ret = ProtoPackage(fileName); 57 while (true) { 58 readDocumentation(); 59 if (pos == data.length) { 60 return ret; 61 } 62 readDeclaration(ret); 63 } 64 } 65 66 this(string _fileName, string _data) 67 { 68 fileName = _fileName; 69 data = _data; 70 } 71 72 private: 73 74 void readDeclaration(Context, string ContextName = Context.stringof)(ref Context context) { 75 // Skip unnecessary semicolons, occasionally used after a nested message declaration. 76 if (peekChar() == ';') { 77 pos++; 78 return; 79 } 80 81 string label = readWord(); 82 83 switch(label) { 84 case "syntax": { 85 static if(is(Context==ProtoPackage)) { 86 unexpected(context.syntax == null, "Too many syntax statements"); 87 unexpected(readChar() == '=', "Expected '=' after 'syntax'"); 88 unexpected(peekChar() == '"', `Expected opening quote '"' after 'syntax ='`); 89 context.syntax = readQuotedString(); 90 unexpected(context.syntax == `"proto2"` || context.syntax == `"proto3"`, 91 "Unexpected syntax version: `" ~ context.syntax ~ "`"); 92 isProto3 = context.syntax == `"proto3"`; 93 unexpected(readChar() == ';', "Expected ';' after syntax declaration"); 94 return; 95 } else { 96 throw new DProtoSyntaxException("syntax in " ~ ContextName); 97 } 98 } 99 case "package": { 100 static if(is(Context==ProtoPackage)) { 101 unexpected(context.packageName == null, "too many package names"); 102 context.packageName = readSymbolName(context); 103 unexpected(readChar() == ';', "Expected ';'"); 104 return; 105 } else { 106 throw new DProtoSyntaxException("package in " ~ ContextName); 107 } 108 } 109 case "import": { 110 static if(is(Context==ProtoPackage)) { 111 bool isPublicImport = false; 112 bool isWeakImport = false; 113 if(peekChar() == 'p') { 114 unexpected(readWord() == "public", "Expected 'public'"); 115 isPublicImport = true; 116 } else if(peekChar() == 'w') { 117 unexpected(readWord() == "weak", "Expected 'weak'"); 118 isWeakImport = true; 119 } 120 if(peekChar() == '"') { 121 context.dependencies ~= Dependency(readQuotedPath (), isPublicImport, isWeakImport); 122 } 123 unexpected(readChar() == ';', "Expected ';'"); 124 return; 125 } else { 126 throw new DProtoSyntaxException("import in " ~ ContextName); 127 } 128 } 129 case "option": { 130 Option result = readOption('='); 131 unexpected(readChar() == ';', "Expected ';'"); 132 context.options[result.name] = result.value; 133 return; 134 } 135 case "message": { 136 static if(hasMember!(Context, "messageTypes")) { 137 context.messageTypes ~= readMessage(context); 138 return; 139 } else { 140 throw new DProtoSyntaxException("message in " ~ ContextName); 141 } 142 } 143 case "enum": { 144 static if(hasMember!(Context, "enumTypes")) { 145 context.enumTypes ~= readEnumType(context); 146 return; 147 } else { 148 throw new DProtoSyntaxException("enum in " ~ ContextName); 149 } 150 } 151 case "extend": { 152 readExtend(); 153 return; 154 } 155 case "service": { 156 static if(hasMember!(Context, "rpcServices")) { 157 context.rpcServices ~= readService(context); 158 return; 159 } else { 160 throw new DProtoSyntaxException("service in " ~ ContextName); 161 } 162 } 163 case "rpc": { 164 static if( hasMember!(Context, "rpc")) { 165 context.rpc ~= readRpc(context); 166 return; 167 } else { 168 throw new DProtoSyntaxException("rpc in " ~ ContextName); 169 } 170 } 171 case "required": 172 case "optional": 173 if( isProto3 ) { 174 throw new DProtoSyntaxException("Field label '" ~ label ~ "' not allowed"); 175 } 176 goto case; 177 case "repeated": { 178 static if( hasMember!(Context, "fields") ) { 179 string type = readSymbolName(context); 180 auto newfield = readField(label, type, context); 181 unexpected(context.fields.all!(a => a.id != newfield.id)(), 182 "Repeated field ID"); 183 context.fields ~= newfield; 184 return; 185 } else { 186 throw new DProtoSyntaxException("Fields must be nested"); 187 } 188 } 189 case "map": 190 case "oneof": { 191 throw new DProtoSyntaxException("'" ~ label ~ "' not yet implemented"); 192 } 193 case "extensions": { 194 static if(!is(Context==ProtoPackage)) { 195 readExtensions(context); 196 return; 197 } else { 198 throw new DProtoSyntaxException("Extensions must be nested"); 199 } 200 } 201 default: { 202 static if (is(Context == EnumType)) 203 { 204 unexpected(readChar() == '=', "Expected '='"); 205 int tag = readInt(); 206 if (context.options.get("allow_alias", "true") == "false" 207 && context.values.values.canFind(tag)) 208 { 209 throw new DProtoSyntaxException("Enum values must not be duplicated"); 210 } 211 unexpected(readChar() == ';', "Expected ';'"); 212 context.values[label] = tag; 213 return; 214 } 215 else static if (hasMember!(Context, "fields")) 216 { 217 string type = reservedName(context, label); 218 auto newfield = readField("optional", type, context); 219 unexpected(context.fields.all!(a => a.id != newfield.id)(), 220 "Repeated field ID"); 221 context.fields ~= newfield; 222 return; 223 } 224 else 225 { 226 throw new DProtoSyntaxException("unexpected label: `" ~ label ~ '`'); 227 } 228 } 229 } 230 } 231 232 /** Reads a message declaration. */ 233 MessageType readMessage(Context)(Context context) { 234 auto ret = MessageType(readSymbolName(context)); 235 ret.options = context.options; 236 unexpected(readChar() == '{', "Expected '{'"); 237 while (true) { 238 readDocumentation(); 239 if (peekChar() == '}') { 240 pos++; 241 break; 242 } 243 readDeclaration(ret); 244 } 245 return ret; 246 } 247 248 /** Reads an extend declaration (just ignores the content). 249 @todo */ 250 void readExtend() { 251 readName(); // Ignore this for now 252 unexpected(readChar() == '{', "Expected '{'"); 253 while (true) { 254 readDocumentation(); 255 if (peekChar() == '}') { 256 pos++; 257 break; 258 } 259 //readDeclaration(); 260 } 261 return; 262 } 263 264 /** Reads a service declaration and returns it. */ 265 Service readService(Context)(Context context) { 266 string name = readSymbolName(context); 267 auto ret = Service(name); 268 269 Service.Method[] methods = []; 270 unexpected(readChar() == '{', "Expected '{'"); 271 while (true) { 272 readDocumentation(); 273 if (peekChar() == '}') { 274 pos++; 275 break; 276 } 277 readDeclaration(ret); 278 } 279 return ret; 280 } 281 282 283 /** Reads an rpc method and returns it. */ 284 Service.Method readRpc(Context)(Context context) { 285 string documentation = ""; 286 string name = readSymbolName(context); 287 288 unexpected(readChar() == '(', "Expected '('"); 289 string requestType = readSymbolName(context); 290 unexpected(readChar() == ')', "Expected ')'"); 291 292 unexpected(readWord() == "returns", "Expected 'returns'"); 293 294 unexpected(readChar() == '(', "Expected '('"); 295 string responseType = readSymbolName(context); 296 // @todo check for option prefixes, responseType is the last in the white spaced list 297 unexpected(readChar() == ')', "Expected ')'"); 298 299 auto ret = Service.Method(name, documentation, requestType, responseType); 300 301 /* process service options and documentation */ 302 if (peekChar() == '{') { 303 pos++; 304 while (true) { 305 readDocumentation(); 306 if (peekChar() == '}') { 307 pos++; 308 break; 309 } 310 readDeclaration(ret); 311 } 312 } 313 else if (readChar() != ';') { 314 throw new DProtoSyntaxException("Expected ';'"); 315 } 316 return ret; 317 } 318 319 /** Reads an enumerated type declaration and returns it. */ 320 EnumType readEnumType(Context)(Context context) { 321 auto ret = EnumType(readSymbolName(context)); 322 unexpected(readChar() == '{', "Expected '{'"); 323 while (true) { 324 readDocumentation(); 325 if (peekChar() == '}') { 326 pos++; 327 break; 328 } 329 readDeclaration(ret); 330 } 331 return ret; 332 } 333 334 /** Reads a field declaration and returns it. */ 335 Field readField(Context)(string label, string type, Context context) { 336 Field.Requirement labelEnum = label.toUpper().to!(Field.Requirement)(); 337 string name = readSymbolName(context); 338 unexpected(readChar() == '=', "Expected '='"); 339 int tag = readInt(); 340 enforce((0 < tag && tag < 19000) || (19999 < tag && tag < 2^^29), 341 new DProtoSyntaxException( 342 "Invalid tag number: "~tag.to!string())); 343 char c = peekChar(); 344 Options options; 345 if (c == '[') { 346 options = readMap('[', ']', '='); 347 c = peekChar(); 348 } 349 if (c == ';') { 350 pos++; 351 if (labelEnum != Field.Requirement.REPEATED && options.get("packed", "false") != "false") { 352 throw new DProtoSyntaxException("[packed = true] can only be specified for repeated primitive fields"); 353 } 354 return Field(labelEnum, type, name, tag, options); 355 } 356 throw new DProtoSyntaxException("Expected ';'"); 357 } 358 359 /** Reads extensions like "extensions 101;" or "extensions 101 to max;". 360 @todo */ 361 Extension readExtensions(Context)(Context context) { 362 Extension ret; 363 int minVal = readInt(); // Range start. 364 if (peekChar() != ';') { 365 unexpected(readWord() == "to", "Expected 'to'"); 366 string maxVal = readWord(); // Range end. 367 if(maxVal != "max") { 368 if(maxVal[0..2] == "0x") { 369 ret.maxVal = maxVal[2..$].to!uint(16); 370 } else { 371 ret.maxVal = maxVal.to!uint(); 372 } 373 } 374 } else { 375 ret.minVal = minVal; 376 ret.maxVal = minVal; 377 } 378 unexpected(readChar() == ';', "Expected ';'"); 379 return ret; 380 } 381 382 /** Reads a option containing a name, an '=' or ':', and a value. */ 383 Option readOption(char keyValueSeparator) { 384 string name = readName(); // Option name. 385 unexpected(readChar() == keyValueSeparator, "Expected '" ~ keyValueSeparator ~ "' in option"); 386 string value = (peekChar() == '{') ? readMap('{', '}', ':').to!string() : readString(); 387 return Option(name, value); 388 } 389 390 /** 391 * Returns a map of string keys and values. This is similar to a JSON object, 392 * with '{' and '}' surrounding the map, ':' separating keys from values, and 393 * ',' separating entries. 394 */ 395 Options readMap(char openBrace, char closeBrace, char keyValueSeparator) { 396 unexpected(readChar() == openBrace, openBrace ~ " to begin map"); 397 Options result; 398 while (peekChar() != closeBrace) { 399 400 Option option = readOption(keyValueSeparator); 401 result[option.name] = option.value; 402 403 char c = peekChar(); 404 if (c == ',') { 405 pos++; 406 } else if (c != closeBrace) { 407 throw new DProtoSyntaxException("Expected ',' or '" ~ closeBrace ~ "'"); 408 } 409 } 410 411 // If we see the close brace, finish immediately. This handles {}/[] and ,}/,] cases. 412 pos++; 413 return result; 414 } 415 416 private: 417 418 /** Reads a non-whitespace character and returns it. */ 419 char readChar() { 420 char result = peekChar(); 421 pos++; 422 return result; 423 } 424 425 /** 426 * Peeks a non-whitespace character and returns it. The only difference 427 * between this and {@code readChar} is that this doesn't consume the char. 428 */ 429 char peekChar() { 430 skipWhitespace(true); 431 unexpected(pos != data.length, "unexpected end of file"); 432 return data[pos]; 433 } 434 435 /** Reads a quoted or unquoted string and returns it. */ 436 string readString() { 437 skipWhitespace(true); 438 return peekChar() == '"' ? readQuotedString() : readWord(); 439 } 440 441 string readQuotedString() { 442 skipWhitespace(true); 443 auto c = readChar(); 444 enforce(c == '"', new DProtoSyntaxException("Expected \" but got " ~ c)); 445 string result; 446 while (pos < data.length) { 447 c = data[pos++]; 448 if (c == '"') return '"'~result~'"'; 449 450 if (c == '\\') { 451 unexpected(pos != data.length, "unexpected end of file"); 452 c = data[pos++]; 453 } 454 455 result ~= c; 456 if (c == '\n') newline(); 457 } 458 throw new DProtoSyntaxException("unterminated string"); 459 } 460 461 string readQuotedPath() { 462 skipWhitespace(true); 463 unexpected(readChar() == '"', "imports should be quoted"); 464 auto ret = readWord(pathPattern); 465 unexpected(readChar() == '"', "imports should be quoted"); 466 return ret; 467 } 468 469 /** Reads a (paren-wrapped), [square-wrapped] or naked symbol name. */ 470 string readName() { 471 string optionName; 472 char c = peekChar(); 473 if (c == '(') { 474 pos++; 475 optionName = readWord(); 476 unexpected(readChar() == ')', "Expected ')'"); 477 } else if (c == '[') { 478 pos++; 479 optionName = readWord(); 480 unexpected(readChar() == ']', "Expected ']'"); 481 } else { 482 optionName = readWord(); 483 } 484 return optionName; 485 } 486 487 /** Reads a symbol name */ 488 string readSymbolName(Context)(Context context) { 489 string name = readWord(); 490 return reservedName(context, name); 491 } 492 493 /** Format a reserved D name */ 494 string reservedName(Context)(Context context, string name) { 495 if(isDKeyword(name)) 496 { 497 // Wrapped in quotes to properly evaluate string 498 string reservedFmtRaw = context.options.get("dproto_reserved_fmt", `"%s_"`); 499 string reservedFmt; 500 formattedRead(reservedFmtRaw, `"%s"`, &reservedFmt); 501 if(reservedFmt != "%s") 502 { 503 name = reservedFmt.format(name); 504 } 505 else 506 { 507 throw new DProtoReservedWordException("Reserved word: "~name); 508 } 509 } 510 return name; 511 } 512 513 /** Reads a non-empty word and returns it. */ 514 string readWord(string pattern = wordPattern) { 515 skipWhitespace(true); 516 int start = pos; 517 while (pos < data.length) { 518 char c = data[pos]; 519 if(pattern.canFind(c)) { 520 pos++; 521 } else { 522 break; 523 } 524 } 525 unexpected(start != pos, "Expected a word"); 526 return data[start .. pos].idup; 527 } 528 529 /** Reads an integer and returns it. */ 530 int readInt() { 531 string tag = readWord(); 532 try { 533 int radix = 10; 534 if (tag.startsWith("0x")) { 535 tag = tag["0x".length .. $]; 536 radix = 16; 537 } 538 else if (tag.startsWith("0")) { 539 radix = 8; 540 } 541 return tag.to!int(radix); 542 } catch (Exception e) { 543 throw new DProtoSyntaxException( 544 "Expected an integer but was `" ~ tag ~ "`", 545 e.msg); 546 } 547 } 548 549 /** 550 * Like {@link #skipWhitespace}, but this returns a string containing all 551 * comment text. By convention, comments before a declaration document that 552 * declaration. 553 */ 554 string readDocumentation() { 555 string result = null; 556 while (true) { 557 skipWhitespace(false); 558 if (pos == data.length || data[pos] != '/') { 559 return result != null ? cleanUpDocumentation(result) : ""; 560 } 561 string comment = readComment(); 562 result = (result == null) ? comment : (result ~ "\n" ~ comment); 563 } 564 } 565 566 /** Reads a comment and returns its body. */ 567 string readComment() { 568 enforce(!(pos == data.length || data[pos] != '/'), new DProtoSyntaxException("")); 569 pos++; 570 int commentType = pos < data.length ? data[pos++] : -1; 571 if (commentType == '*') { 572 int start = pos; 573 while (pos + 1 < data.length) { 574 if (data[pos] == '*' && data[pos + 1] == '/') { 575 pos += 2; 576 return data[start .. pos - 2].idup; 577 } else { 578 char c = data[pos++]; 579 if (c == '\n') newline(); 580 } 581 } 582 throw new DProtoSyntaxException("unterminated comment"); 583 } else if (commentType == '/') { 584 int start = pos; 585 while (pos < data.length) { 586 char c = data[pos++]; 587 if (c == '\n') { 588 newline(); 589 break; 590 } 591 } 592 return data[start .. pos - 1].idup; 593 } else { 594 throw new DProtoSyntaxException("unexpected '/'"); 595 } 596 } 597 598 /** 599 * Returns a string like {@code comment}, but without leading whitespace or 600 * asterisks. 601 */ 602 string cleanUpDocumentation(string comment) { 603 string result; 604 bool beginningOfLine = true; 605 for (int i = 0; i < comment.length; i++) { 606 char c = comment[i]; 607 if (!beginningOfLine || ! " \t*".canFind(c)) { 608 result ~= c; 609 beginningOfLine = false; 610 } 611 if (c == '\n') { 612 beginningOfLine = true; 613 } 614 } 615 return result.strip(); 616 } 617 618 /** 619 * Skips whitespace characters and optionally comments. When this returns, 620 * either {@code pos == data.length} or a non-whitespace character. 621 */ 622 void skipWhitespace(bool skipComments) { 623 while (pos < data.length) { 624 char c = data[pos]; 625 if (" \t\r\n".canFind(c)) { 626 pos++; 627 if (c == '\n') newline(); 628 } else if (skipComments && c == '/') { 629 readComment(); 630 } else { 631 break; 632 } 633 } 634 } 635 636 /** Call this everytime a '\n' is encountered. */ 637 void newline() { 638 line++; 639 lineStart = pos; 640 } 641 642 void unexpected(bool value, string message) 643 { 644 if (!value) 645 { 646 throw new DProtoSyntaxException(message, fileName, line + 1); 647 } 648 } 649 650 /** Returns true if the name is a reserved word in D 651 * 652 * This will cause problems trying to use them as variables 653 * Note: Some keywords are specifically whitelisted, 654 * in order to allow usage of the protobuf names 655 */ 656 bool isDKeyword(string name) 657 { 658 // dfmt off 659 enum KEYWORDS = [ 660 "abstract", "alias", "align", "asm", "assert", "auto", 661 "body", /+ "bool", +/ "break", "byte", 662 "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal", 663 "dchar", "debug", "default", "delegate", "delete", "deprecated", "do", /+ "double", +/ 664 "else", "enum", "export", "extern", 665 "false", "final", "finally", /+ "float", +/ "for", "foreach", "foreach_reverse", "function", 666 "goto", 667 "idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", 668 "lazy", "long", 669 "macro", "mixin", "module", 670 "new", "nothrow", "null", 671 "out", "override", 672 "package", "pragma", "private", "protected", "public", "pure", 673 "real", "ref", "return", 674 "scope", "shared", "short", "static", "struct", "super", "switch", "synchronized", 675 "template", "this", "throw", "true", "try", "typedef", "typeid", "typeof", 676 "ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort", 677 "version", "void", "volatile", 678 "wchar", "while", "with", 679 "__FILE__", "__MODULE__", "__LINE__", "__FUNCTION__", "__PRETTY_FUNCTION__", 680 "__gshared", "__traits", "__vector", "__parameters", 681 ]; 682 // dfmt on 683 return KEYWORDS.canFind(name); 684 } 685 686 } 687 688 return ProtoSchemaParser(name_, data_).readProtoPackage(); 689 690 } 691