1 /******************************************************************************* 2 * Convert a .proto file into a string representing the class 3 * 4 * Author: Matthew Soucy, msoucy@csh.rit.edu 5 * Date: Oct 5, 2013 6 * Version: 0.0.2 7 */ 8 module dproto.parse; 9 10 import dproto.exception; 11 import dproto.intermediate; 12 13 import std.algorithm; 14 import std.array; 15 import std.conv; 16 import std.exception; 17 import std.stdio; 18 import std.string; 19 import std.traits; 20 21 /** 22 * Basic parser for {@code .proto} schema declarations. 23 * 24 * <p>This parser throws away data that it doesn't care about. In particular, 25 * unrecognized options, and extensions are discarded. It doesn't retain nesting 26 * within types. 27 */ 28 ProtoPackage ParseProtoSchema(const string name_, string data_) { 29 30 struct ProtoSchemaParser { 31 32 /** The path to the {@code .proto} file. */ 33 string fileName; 34 35 /** The entire document. */ 36 const char[] data; 37 38 /** Our cursor within the document. {@code data[pos]} is the next character to be read. */ 39 int pos; 40 41 /** The number of newline characters encountered thus far. */ 42 int line; 43 44 /** The index of the most recent newline character. */ 45 int lineStart; 46 47 48 ProtoPackage readProtoPackage() { 49 auto ret = ProtoPackage(fileName); 50 while (true) { 51 readDocumentation(); 52 if (pos == data.length) { 53 return ret; 54 } 55 readDeclaration(ret); 56 } 57 } 58 59 this(string _fileName, string _data) 60 { 61 fileName = _fileName; 62 data = _data; 63 } 64 65 private: 66 67 void readDeclaration(Context, string ContextName = Context.stringof)(ref Context context) { 68 // Skip unnecessary semicolons, occasionally used after a nested message declaration. 69 if (peekChar() == ';') { 70 pos++; 71 return; 72 } 73 74 string label = readWord(); 75 76 switch(label) { 77 case "package": { 78 static if(is(Context==ProtoPackage)) { 79 enforce(context.packageName == null, unexpected("too many package names")); 80 context.packageName = readName(); 81 enforce(readChar() == ';', unexpected("expected ';'")); 82 return; 83 } else { 84 throw unexpected("package in " ~ ContextName); 85 } 86 } 87 case "import": { 88 static if(is(Context==ProtoPackage)) { 89 context.dependencies ~= readString(); 90 enforce(readChar() == ';', unexpected("expected ';'")); 91 return; 92 } else { 93 throw unexpected("import in " ~ ContextName); 94 } 95 } 96 case "option": { 97 Option result = readOption('='); 98 enforce(readChar() == ';', unexpected("expected ';'")); 99 context.options[result.name] = result.value; 100 return; 101 } 102 case "message": { 103 static if(hasMember!(Context, "messageTypes")) { 104 context.messageTypes ~= readMessage(); 105 return; 106 } else { 107 throw unexpected("message in " ~ ContextName); 108 } 109 } 110 case "enum": { 111 static if(hasMember!(Context, "enumTypes")) { 112 context.enumTypes ~= readEnumType(); 113 return; 114 } else { 115 throw unexpected("enum in " ~ ContextName); 116 } 117 } 118 /+ 119 case "service": { 120 readService(); 121 return; 122 } 123 +/ 124 case "extend": { 125 readExtend(); 126 return; 127 } 128 /+ 129 case "rpc": { 130 static if( hasMember!(Context, "rpc")) { 131 readRpc(); 132 return; 133 } else { 134 throw unexpected("rpc in " ~ context) 135 } 136 } 137 +/ 138 case "required": 139 case "optional": 140 case "repeated": { 141 static if( hasMember!(Context, "fields") ) { 142 context.fields ~= readField(label); 143 return; 144 } else { 145 throw unexpected("fields must be nested"); 146 } 147 } 148 case "extensions": { 149 static if(!is(Context==ProtoPackage)) { 150 readExtensions(); 151 return; 152 } else { 153 throw unexpected("extensions must be nested"); 154 } 155 } 156 default: { 157 static if(is(Context==EnumType)) { 158 enforce(readChar() == '=', unexpected("expected '='")); 159 int tag = readInt(); 160 enforce(readChar() == ';', unexpected("expected ';'")); 161 context.values[label] = tag; 162 return; 163 } else { 164 throw unexpected("unexpected label: " ~ label); 165 } 166 } 167 } 168 } 169 170 /** Reads a message declaration. */ 171 MessageType readMessage() { 172 auto ret = MessageType(readName()); 173 enforce(readChar() == '{', unexpected("expected '{'")); 174 while (true) { 175 readDocumentation(); 176 if (peekChar() == '}') { 177 pos++; 178 break; 179 } 180 readDeclaration(ret); 181 } 182 return ret; 183 } 184 185 /** Reads an extend declaration (just ignores the content). 186 @todo */ 187 void readExtend() { 188 readName(); // Ignore this for now 189 enforce(readChar() == '{', unexpected("expected '{'")); 190 while (true) { 191 readDocumentation(); 192 if (peekChar() == '}') { 193 pos++; 194 break; 195 } 196 //readDeclaration(); 197 } 198 return; 199 } 200 201 static if(0) 202 /** Reads a service declaration and returns it. 203 @todo */ 204 Service readService() { 205 string name = readName(); 206 Service.Method[] methods = []; 207 enforce(readChar() == '{', unexpected("expected '{'")); 208 while (true) { 209 string methodDocumentation = readDocumentation(); 210 if (peekChar() == '}') { 211 pos++; 212 break; 213 } 214 Object declared = readDeclaration(Context.SERVICE); 215 if (cast(Service.Method)declared) { 216 methods.add(cast(Service.Method) declared); 217 } 218 } 219 return new Service(name, methods); 220 } 221 222 /** Reads an enumerated type declaration and returns it. */ 223 EnumType readEnumType() { 224 auto ret = EnumType(readName()); 225 enforce(readChar() == '{', unexpected("expected '{'")); 226 while (true) { 227 readDocumentation(); 228 if (peekChar() == '}') { 229 pos++; 230 break; 231 } 232 readDeclaration(ret); 233 } 234 return ret; 235 } 236 237 /** Reads an field declaration and returns it. */ 238 Field readField(string label) { 239 Field.Requirement labelEnum = label.toUpper().to!(Field.Requirement)(); 240 string type = readName(); 241 string name = readName(); 242 enforce(readChar() == '=', unexpected("expected '='")); 243 int tag = readInt(); 244 enforce((0 < tag && tag < 19000) || (19999 < tag && tag < 2^^29), new DProtoException("Invalid tag number: "~tag.to!string())); 245 char c = peekChar(); 246 Options options; 247 if (c == '[') { 248 options = readMap('[', ']', '='); 249 c = peekChar(); 250 } 251 if (c == ';') { 252 pos++; 253 return Field(labelEnum, type, name, tag, options); 254 } 255 throw unexpected("expected ';'"); 256 } 257 258 /** Reads extensions like "extensions 101;" or "extensions 101 to max;". 259 @todo */ 260 Extension readExtensions() { 261 Extension ret; 262 int minVal = readInt(); // Range start. 263 if (peekChar() != ';') { 264 readWord(); // Literal 'to' 265 string maxVal = readWord(); // Range end. 266 if(maxVal != "max") { 267 if(maxVal[0..2] == "0x") { 268 ret.maxVal = maxVal[2..$].to!uint(16); 269 } else { 270 ret.maxVal = maxVal.to!uint(); 271 } 272 } 273 } else { 274 ret.minVal = minVal; 275 ret.maxVal = minVal; 276 } 277 enforce(readChar() == ';', unexpected("expected ';'")); 278 return ret; 279 } 280 281 /** Reads a option containing a name, an '=' or ':', and a value. */ 282 Option readOption(char keyValueSeparator) { 283 string name = readName(); // Option name. 284 enforce(readChar() == keyValueSeparator, unexpected("expected '" ~ keyValueSeparator ~ "' in option")); 285 string value = (peekChar() == '{') ? readMap('{', '}', ':').to!string() : readString(); 286 return Option(name, value); 287 } 288 289 /** 290 * Returns a map of string keys and values. This is similar to a JSON object, 291 * with '{' and '}' surrounding the map, ':' separating keys from values, and 292 * ',' separating entries. 293 */ 294 Options readMap(char openBrace, char closeBrace, char keyValueSeparator) { 295 enforce(readChar() == openBrace, unexpected(openBrace ~ " to begin map")); 296 Options result; 297 while (peekChar() != closeBrace) { 298 299 Option option = readOption(keyValueSeparator); 300 result[option.name] = option.value; 301 302 char c = peekChar(); 303 if (c == ',') { 304 pos++; 305 } else if (c != closeBrace) { 306 throw unexpected("expected ',' or '" ~ closeBrace ~ "'"); 307 } 308 } 309 310 // If we see the close brace, finish immediately. This handles {}/[] and ,}/,] cases. 311 pos++; 312 return result; 313 } 314 315 static if(0) 316 /** Reads an rpc method and returns it. 317 @todo */ 318 Service.Method readRpc(string documentation) { 319 string name = readName(); 320 321 enforce(readChar() == '(', unexpected("expected '('")); 322 string requestType = readName(); 323 enforce(readChar() == ')', unexpected("expected ')'")); 324 325 enforce(readWord() != "returns", unexpected("expected 'returns'")); 326 327 enforce(readChar() == '(', unexpected("expected '('")); 328 string responseType = readName(); 329 enforce(readChar() == ')', unexpected("expected ')'")); 330 331 Option[] options = []; 332 if (peekChar() == '{') { 333 pos++; 334 while (true) { 335 string methodDocumentation = readDocumentation(); 336 if (peekChar() == '}') { 337 pos++; 338 break; 339 } 340 Object declared = readDeclaration(methodDocumentation, Context.RPC); 341 if (cast(Option)declared) { 342 Option option = cast(Option) declared; 343 options.put(option.getName(), option.getValue()); 344 } 345 } 346 } else if (readChar() != ';') throw unexpected("expected ';'"); 347 348 return new Service.Method(name, documentation, requestType, responseType, options); 349 } 350 351 private: 352 353 /** Reads a non-whitespace character and returns it. */ 354 char readChar() { 355 char result = peekChar(); 356 pos++; 357 return result; 358 } 359 360 /** 361 * Peeks a non-whitespace character and returns it. The only difference 362 * between this and {@code readChar} is that this doesn't consume the char. 363 */ 364 char peekChar() { 365 skipWhitespace(true); 366 enforce(pos != data.length, unexpected("unexpected end of file")); 367 return data[pos]; 368 } 369 370 /** Reads a quoted or unquoted string and returns it. */ 371 string readString() { 372 skipWhitespace(true); 373 return peekChar() == '"' ? readQuotedString() : readWord(); 374 } 375 376 string readQuotedString() { 377 enforce(readChar() == '"', new DProtoException("")); 378 string result; 379 while (pos < data.length) { 380 char c = data[pos++]; 381 if (c == '"') return '"'~result~'"'; 382 383 if (c == '\\') { 384 enforce(pos != data.length, unexpected("unexpected end of file")); 385 c = data[pos++]; 386 } 387 388 result ~= c; 389 if (c == '\n') newline(); 390 } 391 throw unexpected("unterminated string"); 392 } 393 394 /** Reads a (paren-wrapped), [square-wrapped] or naked symbol name. */ 395 string readName() { 396 string optionName; 397 char c = peekChar(); 398 if (c == '(') { 399 pos++; 400 optionName = readWord(); 401 enforce(readChar() == ')', unexpected("expected ')'")); 402 } else if (c == '[') { 403 pos++; 404 optionName = readWord(); 405 enforce(readChar() == ']', unexpected("expected ']'")); 406 } else { 407 optionName = readWord(); 408 } 409 return optionName; 410 } 411 412 /** Reads a non-empty word and returns it. */ 413 string readWord() { 414 skipWhitespace(true); 415 int start = pos; 416 while (pos < data.length) { 417 char c = data[pos]; 418 if(c.inPattern(`a-zA-Z0-9_.\-`)) { 419 pos++; 420 } else { 421 break; 422 } 423 } 424 enforce(start != pos, unexpected("expected a word")); 425 return data[start .. pos].idup; 426 } 427 428 /** Reads an integer and returns it. */ 429 int readInt() { 430 string tag = readWord(); 431 try { 432 int radix = 10; 433 if (tag.startsWith("0x")) { 434 tag = tag["0x".length .. $]; 435 radix = 16; 436 } 437 return tag.to!int(radix); 438 } catch (Exception e) { 439 throw unexpected("expected an integer but was " ~ tag); 440 } 441 } 442 443 /** 444 * Like {@link #skipWhitespace}, but this returns a string containing all 445 * comment text. By convention, comments before a declaration document that 446 * declaration. 447 */ 448 string readDocumentation() { 449 string result = null; 450 while (true) { 451 skipWhitespace(false); 452 if (pos == data.length || data[pos] != '/') { 453 return result != null ? cleanUpDocumentation(result) : ""; 454 } 455 string comment = readComment(); 456 result = (result == null) ? comment : (result ~ "\n" ~ comment); 457 } 458 } 459 460 /** Reads a comment and returns its body. */ 461 string readComment() { 462 enforce(!(pos == data.length || data[pos] != '/'), new DProtoException("")); 463 pos++; 464 int commentType = pos < data.length ? data[pos++] : -1; 465 if (commentType == '*') { 466 int start = pos; 467 while (pos + 1 < data.length) { 468 if (data[pos] == '*' && data[pos + 1] == '/') { 469 pos += 2; 470 return data[start .. pos - 2].idup; 471 } else { 472 char c = data[pos++]; 473 if (c == '\n') newline(); 474 } 475 } 476 throw unexpected("unterminated comment"); 477 } else if (commentType == '/') { 478 int start = pos; 479 while (pos < data.length) { 480 char c = data[pos++]; 481 if (c == '\n') { 482 newline(); 483 break; 484 } 485 } 486 return data[start .. pos - 1].idup; 487 } else { 488 throw unexpected("unexpected '/'"); 489 } 490 } 491 492 /** 493 * Returns a string like {@code comment}, but without leading whitespace or 494 * asterisks. 495 */ 496 string cleanUpDocumentation(string comment) { 497 string result; 498 bool beginningOfLine = true; 499 for (int i = 0; i < comment.length; i++) { 500 char c = comment[i]; 501 if (!beginningOfLine || ! " \t*".canFind(c)) { 502 result ~= c; 503 beginningOfLine = false; 504 } 505 if (c == '\n') { 506 beginningOfLine = true; 507 } 508 } 509 return result.strip(); 510 } 511 512 /** 513 * Skips whitespace characters and optionally comments. When this returns, 514 * either {@code pos == data.length} or a non-whitespace character. 515 */ 516 void skipWhitespace(bool skipComments) { 517 while (pos < data.length) { 518 char c = data[pos]; 519 if (" \t\r\n".canFind(c)) { 520 pos++; 521 if (c == '\n') newline(); 522 } else if (skipComments && c == '/') { 523 readComment(); 524 } else { 525 break; 526 } 527 } 528 } 529 530 /** Call this everytime a '\n' is encountered. */ 531 void newline() { 532 line++; 533 lineStart = pos; 534 } 535 536 Exception unexpected(string message) { 537 throw new DProtoException("Syntax error in %s at %d:%d: %s" 538 .format(fileName, line+1, (pos - lineStart + 1), message)); 539 } 540 541 } 542 543 return ProtoSchemaParser(name_, data_).readProtoPackage(); 544 545 }