1 /******************************************************************************* 2 * Convert a .proto file into a string representing the class 3 * 4 * Author: Matthew Soucy, msoucy@csh.rit.edu 5 * Date: Oct 5, 2013 6 * Version: 0.0.2 7 */ 8 module dproto.parse; 9 10 import dproto.exception; 11 import dproto.intermediate; 12 import dproto.serialize : isBuiltinType; 13 14 import std.algorithm; 15 import std.array; 16 import std.conv; 17 import std.exception; 18 import std.stdio; 19 import std.string; 20 import std.format; 21 import std.traits; 22 23 /** 24 * Basic parser for {@code .proto} schema declarations. 25 * 26 * <p>This parser throws away data that it doesn't care about. In particular, 27 * unrecognized options, and extensions are discarded. It doesn't retain nesting 28 * within types. 29 */ 30 ProtoPackage ParseProtoSchema(const string name_, string data_) 31 { 32 33 struct ProtoSchemaParser { 34 35 /** The path to the {@code .proto} file. */ 36 string fileName; 37 38 /** The entire document. */ 39 const char[] data; 40 41 /** Our cursor within the document. {@code data[pos]} is the next character to be read. */ 42 int pos; 43 44 /** The number of newline characters encountered thus far. */ 45 int line; 46 47 /** The index of the most recent newline character. */ 48 int lineStart; 49 50 51 ProtoPackage readProtoPackage() { 52 auto ret = ProtoPackage(fileName); 53 while (true) { 54 readDocumentation(); 55 if (pos == data.length) { 56 return ret; 57 } 58 readDeclaration(ret); 59 } 60 } 61 62 this(string _fileName, string _data) 63 { 64 fileName = _fileName; 65 data = _data; 66 } 67 68 private: 69 70 void readDeclaration(Context, string ContextName = Context.stringof)(ref Context context) { 71 // Skip unnecessary semicolons, occasionally used after a nested message declaration. 72 if (peekChar() == ';') { 73 pos++; 74 return; 75 } 76 77 string label = readWord(); 78 79 switch(label) { 80 case "syntax": { 81 static if(is(Context==ProtoPackage)) { 82 unexpected(context.syntax == null, "too many syntax statements"); 83 unexpected(readChar() == '=', "Expected '=' after 'syntax'"); 84 unexpected(peekChar() == '"', `Expected opening quote '"' after 'syntax ='`); 85 context.syntax = readQuotedString(); 86 unexpected(context.syntax == `"proto2"` || context.syntax == `"proto3"`, 87 "Unexpected syntax version: `" ~ context.syntax ~ "`"); 88 unexpected(readChar() == ';', "Expected ';' after syntax declaration"); 89 return; 90 } else { 91 throw new DProtoSyntaxException("syntax in " ~ ContextName); 92 } 93 } 94 case "package": { 95 static if(is(Context==ProtoPackage)) { 96 unexpected(context.packageName == null, "too many package names"); 97 context.packageName = readSymbolName(context); 98 unexpected(readChar() == ';', "Expected ';'"); 99 return; 100 } else { 101 throw new DProtoSyntaxException("package in " ~ ContextName); 102 } 103 } 104 case "import": { 105 static if(is(Context==ProtoPackage)) { 106 bool isPublicImport = false; 107 if(peekChar() == 'p') { 108 unexpected(readWord() == "public", "Expected 'public'"); 109 isPublicImport = true; 110 } 111 if(peekChar() == '"') { 112 context.dependencies ~= Dependency(readQuotedPath (), isPublicImport); 113 } 114 unexpected(readChar() == ';', "Expected ';'"); 115 return; 116 } else { 117 throw new DProtoSyntaxException("import in " ~ ContextName); 118 } 119 } 120 case "option": { 121 Option result = readOption('='); 122 unexpected(readChar() == ';', "Expected ';'"); 123 context.options[result.name] = result.value; 124 return; 125 } 126 case "message": { 127 static if(hasMember!(Context, "messageTypes")) { 128 context.messageTypes ~= readMessage(context); 129 return; 130 } else { 131 throw new DProtoSyntaxException("message in " ~ ContextName); 132 } 133 } 134 case "enum": { 135 static if(hasMember!(Context, "enumTypes")) { 136 context.enumTypes ~= readEnumType(context); 137 return; 138 } else { 139 throw new DProtoSyntaxException("enum in " ~ ContextName); 140 } 141 } 142 case "extend": { 143 readExtend(); 144 return; 145 } 146 case "service": { 147 static if(hasMember!(Context, "rpcServices")) { 148 context.rpcServices ~= readService(context); 149 return; 150 } else { 151 throw new DProtoSyntaxException("service in " ~ ContextName); 152 } 153 } 154 case "rpc": { 155 static if( hasMember!(Context, "rpc")) { 156 context.rpc ~= readRpc(context); 157 return; 158 } else { 159 throw new DProtoSyntaxException("rpc in " ~ ContextName); 160 } 161 } 162 case "required": 163 case "optional": 164 case "repeated": { 165 static if( hasMember!(Context, "fields") ) { 166 string type = readSymbolName(context); 167 context.fields ~= readField(label, type, context); 168 return; 169 } else { 170 throw new DProtoSyntaxException("Fields must be nested"); 171 } 172 } 173 case "extensions": { 174 static if(!is(Context==ProtoPackage)) { 175 readExtensions(context); 176 return; 177 } else { 178 throw new DProtoSyntaxException("Extensions must be nested"); 179 } 180 } 181 default: { 182 static if (is(Context == EnumType)) 183 { 184 unexpected(readChar() == '=', "Expected '='"); 185 int tag = readInt(); 186 if (context.options.get("allow_alias", "true") == "false" 187 && context.values.values.canFind(tag)) 188 { 189 throw new DProtoSyntaxException("Enum values must not be duplicated"); 190 } 191 unexpected(readChar() == ';', "Expected ';'"); 192 context.values[label] = tag; 193 return; 194 } 195 else 196 { 197 static if (hasMember!(Context, "fields")) 198 { 199 if (isBuiltinType(label)) 200 { 201 context.fields ~= readField("optional", label, context); 202 return; 203 } 204 } 205 throw new DProtoSyntaxException("unexpected label: `" ~ label ~ '`'); 206 } 207 } 208 } 209 } 210 211 /** Reads a message declaration. */ 212 MessageType readMessage(Context)(Context context) { 213 auto ret = MessageType(readSymbolName(context)); 214 ret.options = context.options; 215 unexpected(readChar() == '{', "Expected '{'"); 216 while (true) { 217 readDocumentation(); 218 if (peekChar() == '}') { 219 pos++; 220 break; 221 } 222 readDeclaration(ret); 223 } 224 return ret; 225 } 226 227 /** Reads an extend declaration (just ignores the content). 228 @todo */ 229 void readExtend() { 230 readName(); // Ignore this for now 231 unexpected(readChar() == '{', "Expected '{'"); 232 while (true) { 233 readDocumentation(); 234 if (peekChar() == '}') { 235 pos++; 236 break; 237 } 238 //readDeclaration(); 239 } 240 return; 241 } 242 243 /** Reads a service declaration and returns it. */ 244 Service readService(Context)(Context context) { 245 string name = readSymbolName(context); 246 auto ret = Service(name); 247 248 Service.Method[] methods = []; 249 unexpected(readChar() == '{', "Expected '{'"); 250 while (true) { 251 readDocumentation(); 252 if (peekChar() == '}') { 253 pos++; 254 break; 255 } 256 readDeclaration(ret); 257 } 258 return ret; 259 } 260 261 262 /** Reads an rpc method and returns it. */ 263 Service.Method readRpc(Context)(Context context) { 264 string documentation = ""; 265 string name = readSymbolName(context); 266 267 unexpected(readChar() == '(', "Expected '('"); 268 string requestType = readSymbolName(context); 269 unexpected(readChar() == ')', "Expected ')'"); 270 271 unexpected(readWord() == "returns", "Expected 'returns'"); 272 273 unexpected(readChar() == '(', "Expected '('"); 274 string responseType = readSymbolName(context); 275 // @todo check for option prefixes, responseType is the last in the white spaced list 276 unexpected(readChar() == ')', "Expected ')'"); 277 278 auto ret = Service.Method(name, documentation, requestType, responseType); 279 280 /* process service options and documentation */ 281 if (peekChar() == '{') { 282 pos++; 283 while (true) { 284 readDocumentation(); 285 if (peekChar() == '}') { 286 pos++; 287 break; 288 } 289 readDeclaration(ret); 290 } 291 } 292 else if (readChar() != ';') { 293 throw new DProtoSyntaxException("Expected ';'"); 294 } 295 return ret; 296 } 297 298 /** Reads an enumerated type declaration and returns it. */ 299 EnumType readEnumType(Context)(Context context) { 300 auto ret = EnumType(readSymbolName(context)); 301 unexpected(readChar() == '{', "Expected '{'"); 302 while (true) { 303 readDocumentation(); 304 if (peekChar() == '}') { 305 pos++; 306 break; 307 } 308 readDeclaration(ret); 309 } 310 return ret; 311 } 312 313 /** Reads a field declaration and returns it. */ 314 Field readField(Context)(string label, string type, Context context) { 315 Field.Requirement labelEnum = label.toUpper().to!(Field.Requirement)(); 316 string name = readSymbolName(context); 317 unexpected(readChar() == '=', "Expected '='"); 318 int tag = readInt(); 319 enforce((0 < tag && tag < 19000) || (19999 < tag && tag < 2^^29), 320 new DProtoSyntaxException( 321 "Invalid tag number: "~tag.to!string())); 322 char c = peekChar(); 323 Options options; 324 if (c == '[') { 325 options = readMap('[', ']', '='); 326 c = peekChar(); 327 } 328 if (c == ';') { 329 pos++; 330 return Field(labelEnum, type, name, tag, options); 331 } 332 throw new DProtoSyntaxException("Expected ';'"); 333 } 334 335 /** Reads extensions like "extensions 101;" or "extensions 101 to max;". 336 @todo */ 337 Extension readExtensions(Context)(Context context) { 338 Extension ret; 339 int minVal = readInt(); // Range start. 340 if (peekChar() != ';') { 341 unexpected(readWord() == "to", "Expected 'to'"); 342 string maxVal = readWord(); // Range end. 343 if(maxVal != "max") { 344 if(maxVal[0..2] == "0x") { 345 ret.maxVal = maxVal[2..$].to!uint(16); 346 } else { 347 ret.maxVal = maxVal.to!uint(); 348 } 349 } 350 } else { 351 ret.minVal = minVal; 352 ret.maxVal = minVal; 353 } 354 unexpected(readChar() == ';', "Expected ';'"); 355 return ret; 356 } 357 358 /** Reads a option containing a name, an '=' or ':', and a value. */ 359 Option readOption(char keyValueSeparator) { 360 string name = readName(); // Option name. 361 unexpected(readChar() == keyValueSeparator, "Expected '" ~ keyValueSeparator ~ "' in option"); 362 string value = (peekChar() == '{') ? readMap('{', '}', ':').to!string() : readString(); 363 return Option(name, value); 364 } 365 366 /** 367 * Returns a map of string keys and values. This is similar to a JSON object, 368 * with '{' and '}' surrounding the map, ':' separating keys from values, and 369 * ',' separating entries. 370 */ 371 Options readMap(char openBrace, char closeBrace, char keyValueSeparator) { 372 unexpected(readChar() == openBrace, openBrace ~ " to begin map"); 373 Options result; 374 while (peekChar() != closeBrace) { 375 376 Option option = readOption(keyValueSeparator); 377 result[option.name] = option.value; 378 379 char c = peekChar(); 380 if (c == ',') { 381 pos++; 382 } else if (c != closeBrace) { 383 throw new DProtoSyntaxException("Expected ',' or '" ~ closeBrace ~ "'"); 384 } 385 } 386 387 // If we see the close brace, finish immediately. This handles {}/[] and ,}/,] cases. 388 pos++; 389 return result; 390 } 391 392 private: 393 394 /** Reads a non-whitespace character and returns it. */ 395 char readChar() { 396 char result = peekChar(); 397 pos++; 398 return result; 399 } 400 401 /** 402 * Peeks a non-whitespace character and returns it. The only difference 403 * between this and {@code readChar} is that this doesn't consume the char. 404 */ 405 char peekChar() { 406 skipWhitespace(true); 407 unexpected(pos != data.length, "unexpected end of file"); 408 return data[pos]; 409 } 410 411 /** Reads a quoted or unquoted string and returns it. */ 412 string readString() { 413 skipWhitespace(true); 414 return peekChar() == '"' ? readQuotedString() : readWord(); 415 } 416 417 string readQuotedString() { 418 skipWhitespace(true); 419 auto c = readChar(); 420 enforce(c == '"', new DProtoSyntaxException("Expected \" but got " ~ c)); 421 string result; 422 while (pos < data.length) { 423 c = data[pos++]; 424 if (c == '"') return '"'~result~'"'; 425 426 if (c == '\\') { 427 unexpected(pos != data.length, "unexpected end of file"); 428 c = data[pos++]; 429 } 430 431 result ~= c; 432 if (c == '\n') newline(); 433 } 434 throw new DProtoSyntaxException("unterminated string"); 435 } 436 437 string readQuotedPath() { 438 skipWhitespace(true); 439 unexpected(readChar() == '"', "imports should be quoted"); 440 auto ret = readWord(`a-zA-Z0-9_.\-/`); 441 unexpected(readChar() == '"', "imports should be quoted"); 442 return ret; 443 } 444 445 /** Reads a (paren-wrapped), [square-wrapped] or naked symbol name. */ 446 string readName() { 447 string optionName; 448 char c = peekChar(); 449 if (c == '(') { 450 pos++; 451 optionName = readWord(); 452 unexpected(readChar() == ')', "Expected ')'"); 453 } else if (c == '[') { 454 pos++; 455 optionName = readWord(); 456 unexpected(readChar() == ']', "Expected ']'"); 457 } else { 458 optionName = readWord(); 459 } 460 return optionName; 461 } 462 463 /** Reads a symbol name */ 464 string readSymbolName(Context)(Context context) { 465 string name = readWord(); 466 if(isDKeyword(name)) 467 { 468 // Wrapped in quotes to properly evaluate string 469 string reservedFmtRaw = context.options.get("dproto_reserved_fmt", `"%s_"`); 470 string reservedFmt; 471 formattedRead(reservedFmtRaw, `"%s"`, &reservedFmt); 472 if(reservedFmt != "%s") 473 { 474 name = reservedFmt.format(name); 475 } 476 else 477 { 478 throw new DProtoReservedWordException(name); 479 } 480 } 481 return name; 482 } 483 484 /** Reads a non-empty word and returns it. */ 485 string readWord(string pattern = `a-zA-Z0-9_.\-`) { 486 skipWhitespace(true); 487 int start = pos; 488 while (pos < data.length) { 489 char c = data[pos]; 490 if(c.inPattern(pattern)) { 491 pos++; 492 } else { 493 break; 494 } 495 } 496 unexpected(start != pos, "Expected a word"); 497 return data[start .. pos].idup; 498 } 499 500 /** Reads an integer and returns it. */ 501 int readInt() { 502 string tag = readWord(); 503 try { 504 int radix = 10; 505 if (tag.startsWith("0x")) { 506 tag = tag["0x".length .. $]; 507 radix = 16; 508 } 509 return tag.to!int(radix); 510 } catch (Exception e) { 511 throw new DProtoSyntaxException( 512 "Expected an integer but was `" ~ tag ~ "`", 513 e.msg); 514 } 515 } 516 517 /** 518 * Like {@link #skipWhitespace}, but this returns a string containing all 519 * comment text. By convention, comments before a declaration document that 520 * declaration. 521 */ 522 string readDocumentation() { 523 string result = null; 524 while (true) { 525 skipWhitespace(false); 526 if (pos == data.length || data[pos] != '/') { 527 return result != null ? cleanUpDocumentation(result) : ""; 528 } 529 string comment = readComment(); 530 result = (result == null) ? comment : (result ~ "\n" ~ comment); 531 } 532 } 533 534 /** Reads a comment and returns its body. */ 535 string readComment() { 536 enforce(!(pos == data.length || data[pos] != '/'), new DProtoSyntaxException("")); 537 pos++; 538 int commentType = pos < data.length ? data[pos++] : -1; 539 if (commentType == '*') { 540 int start = pos; 541 while (pos + 1 < data.length) { 542 if (data[pos] == '*' && data[pos + 1] == '/') { 543 pos += 2; 544 return data[start .. pos - 2].idup; 545 } else { 546 char c = data[pos++]; 547 if (c == '\n') newline(); 548 } 549 } 550 throw new DProtoSyntaxException("unterminated comment"); 551 } else if (commentType == '/') { 552 int start = pos; 553 while (pos < data.length) { 554 char c = data[pos++]; 555 if (c == '\n') { 556 newline(); 557 break; 558 } 559 } 560 return data[start .. pos - 1].idup; 561 } else { 562 throw new DProtoSyntaxException("unexpected '/'"); 563 } 564 } 565 566 /** 567 * Returns a string like {@code comment}, but without leading whitespace or 568 * asterisks. 569 */ 570 string cleanUpDocumentation(string comment) { 571 string result; 572 bool beginningOfLine = true; 573 for (int i = 0; i < comment.length; i++) { 574 char c = comment[i]; 575 if (!beginningOfLine || ! " \t*".canFind(c)) { 576 result ~= c; 577 beginningOfLine = false; 578 } 579 if (c == '\n') { 580 beginningOfLine = true; 581 } 582 } 583 return result.strip(); 584 } 585 586 /** 587 * Skips whitespace characters and optionally comments. When this returns, 588 * either {@code pos == data.length} or a non-whitespace character. 589 */ 590 void skipWhitespace(bool skipComments) { 591 while (pos < data.length) { 592 char c = data[pos]; 593 if (" \t\r\n".canFind(c)) { 594 pos++; 595 if (c == '\n') newline(); 596 } else if (skipComments && c == '/') { 597 readComment(); 598 } else { 599 break; 600 } 601 } 602 } 603 604 /** Call this everytime a '\n' is encountered. */ 605 void newline() { 606 line++; 607 lineStart = pos; 608 } 609 610 void unexpected(bool value, string message) 611 { 612 if (!value) 613 { 614 new DProtoSyntaxException( 615 "Syntax error in %s at %d:%d: %s".format(fileName, line + 1, 616 (pos - lineStart + 1), message)); 617 } 618 } 619 620 /** Returns true if the name is a reserved word in D 621 * 622 * This will cause problems trying to use them as variables 623 * Note: Some keywords are specifically whitelisted, 624 * in order to allow usage of the protobuf names 625 */ 626 bool isDKeyword(string name) 627 { 628 // dfmt off 629 enum KEYWORDS = [ 630 "abstract", "alias", "align", "asm", "assert", "auto", 631 "body", /+ "bool", +/ "break", "byte", 632 "case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal", 633 "dchar", "debug", "default", "delegate", "delete", "deprecated", "do", /+ "double", +/ 634 "else", "enum", "export", "extern", 635 "false", "final", "finally", /+ "float", +/ "for", "foreach", "foreach_reverse", "function", 636 "goto", 637 "idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is", 638 "lazy", "long", 639 "macro", "mixin", "module", 640 "new", "nothrow", "null", 641 "out", "override", 642 "package", "pragma", "private", "protected", "public", "pure", 643 "real", "ref", "return", 644 "scope", "shared", "short", "static", "struct", "super", "switch", "synchronized", 645 "template", "this", "throw", "true", "try", "typedef", "typeid", "typeof", 646 "ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort", 647 "version", "void", "volatile", 648 "wchar", "while", "with", 649 "__FILE__", "__MODULE__", "__LINE__", "__FUNCTION__", "__PRETTY_FUNCTION__", 650 "__gshared", "__traits", "__vector", "__parameters", 651 ]; 652 // dfmt on 653 return KEYWORDS.canFind(name); 654 } 655 656 } 657 658 return ProtoSchemaParser(name_, data_).readProtoPackage(); 659 660 } 661