1 /*******************************************************************************
2  * Convert a .proto file into a string representing the class
3  *
4  * Author: Matthew Soucy, dproto@msoucy.me
5  */
6 module dproto.parse;
7 
8 import dproto.exception;
9 import dproto.intermediate;
10 import dproto.serialize : isBuiltinType;
11 
12 import std.algorithm;
13 import std.array;
14 import std.ascii;
15 import std.conv;
16 import std.exception;
17 import std.format;
18 import std.stdio;
19 import std..string;
20 import std.traits;
21 
22 enum wordPattern = std.ascii.letters ~ std.ascii.digits ~ `_.\-`;
23 enum pathPattern = wordPattern ~ `/`;
24 
25 /**
26  * Basic parser for {@code .proto} schema declarations.
27  *
28  * <p>This parser throws away data that it doesn't care about. In particular,
29  * unrecognized options, and extensions are discarded. It doesn't retain nesting
30  * within types.
31  */
32 ProtoPackage ParseProtoSchema(const string name_, string data_)
33 {
34 
35 	struct ProtoSchemaParser {
36 
37 		/** The path to the {@code .proto} file. */
38 		string fileName;
39 
40 		/** The entire document. */
41 		const char[] data;
42 
43 		/** Our cursor within the document. {@code data[pos]} is the next character to be read. */
44 		int pos;
45 
46 		/** The number of newline characters encountered thus far. */
47 		int line;
48 
49 		/** The index of the most recent newline character. */
50 		int lineStart;
51 
52 		/** Are we parsing proto 3 syntax? */
53 		bool isProto3;
54 
55 		ProtoPackage readProtoPackage() {
56 			auto ret = ProtoPackage(fileName);
57 			while (true) {
58 				readDocumentation();
59 				if (pos == data.length) {
60 					return ret;
61 				}
62 				readDeclaration(ret);
63 			}
64 		}
65 
66 		this(string _fileName, string _data)
67 		{
68 			fileName = _fileName;
69 			data = _data;
70 		}
71 
72 	private:
73 
74 		void readDeclaration(Context, string ContextName = Context.stringof)(ref Context context) {
75 			// Skip unnecessary semicolons, occasionally used after a nested message declaration.
76 			if (peekChar() == ';') {
77 				pos++;
78 				return;
79 			}
80 
81 			string label = readWord();
82 
83 			switch(label) {
84 				case "syntax": {
85 					static if(is(Context==ProtoPackage)) {
86 						unexpected(context.syntax == null, "Too many syntax statements");
87 						unexpected(readChar() == '=', "Expected '=' after 'syntax'");
88 						unexpected(peekChar() == '"', `Expected opening quote '"' after 'syntax ='`);
89 						context.syntax = readQuotedString();
90 						unexpected(context.syntax == `"proto2"` || context.syntax == `"proto3"`,
91 						           "Unexpected syntax version: `" ~ context.syntax ~ "`");
92 						isProto3 = context.syntax == `"proto3"`;
93 						unexpected(readChar() == ';', "Expected ';' after syntax declaration");
94 						return;
95 					} else {
96 						throw new DProtoSyntaxException("syntax in " ~ ContextName);
97 					}
98 				}
99 				case "package": {
100 					static if(is(Context==ProtoPackage)) {
101 						unexpected(context.packageName == null, "too many package names");
102 						context.packageName = readSymbolName(context);
103 						unexpected(readChar() == ';', "Expected ';'");
104 						return;
105 					} else {
106 						throw new DProtoSyntaxException("package in " ~ ContextName);
107 					}
108 				}
109 				case "import": {
110 					static if(is(Context==ProtoPackage)) {
111 						bool isPublicImport = false;
112 						bool isWeakImport = false;
113 						if(peekChar() == 'p') {
114 							unexpected(readWord() == "public", "Expected 'public'");
115 							isPublicImport = true;
116 						} else if(peekChar() == 'w') {
117 							unexpected(readWord() == "weak", "Expected 'weak'");
118 							isWeakImport = true;
119 						}
120 						if(peekChar() == '"') {
121 							context.dependencies ~= Dependency(readQuotedPath (), isPublicImport, isWeakImport);
122 						}
123 						unexpected(readChar() == ';', "Expected ';'");
124 						return;
125 					} else {
126 						throw new DProtoSyntaxException("import in " ~ ContextName);
127 					}
128 				}
129 				case "option": {
130 					Option result = readOption('=');
131 					unexpected(readChar() == ';', "Expected ';'");
132 					context.options[result.name] = result.value;
133 					return;
134 				}
135 				case "message": {
136 					static if(hasMember!(Context, "messageTypes")) {
137 						context.messageTypes ~= readMessage(context);
138 						return;
139 					} else {
140 						throw new DProtoSyntaxException("message in " ~ ContextName);
141 					}
142 				}
143 				case "enum": {
144 					static if(hasMember!(Context, "enumTypes")) {
145 						context.enumTypes ~= readEnumType(context);
146 						return;
147 					} else {
148 						throw new DProtoSyntaxException("enum in " ~ ContextName);
149 					}
150 				}
151 				case "extend": {
152 					readExtend();
153 					return;
154 				}
155 				case "service": {
156 					static if(hasMember!(Context, "rpcServices")) {
157 						context.rpcServices ~= readService(context);
158 						return;
159 					} else {
160 						throw new DProtoSyntaxException("service in " ~ ContextName);
161 					}
162 				}
163 				case "rpc": {
164 					static if( hasMember!(Context, "rpc")) {
165 						context.rpc ~= readRpc(context);
166 						return;
167 					} else {
168 						throw new DProtoSyntaxException("rpc in " ~ ContextName);
169 					}
170 				}
171 				case "required":
172 				case "optional":
173 					if( isProto3 ) {
174 						throw new DProtoSyntaxException("Field label '" ~ label ~ "' not allowed");
175 					}
176 					goto case;
177 				case "repeated": {
178 					static if( hasMember!(Context, "fields") ) {
179 						string type = readSymbolName(context);
180 						auto newfield = readField(label, type, context);
181 						unexpected(context.fields.all!(a => a.id != newfield.id)(),
182 									"Repeated field ID");
183 						context.fields ~= newfield;
184 						return;
185 					} else {
186 						throw new DProtoSyntaxException("Fields must be nested");
187 					}
188 				}
189 				case "map":
190 				case "oneof": {
191 					throw new DProtoSyntaxException("'" ~ label ~ "' not yet implemented");
192 				}
193 				case "extensions": {
194 					static if(!is(Context==ProtoPackage)) {
195 						readExtensions(context);
196 						return;
197 					} else {
198 						throw new DProtoSyntaxException("Extensions must be nested");
199 					}
200 				}
201 				default: {
202 					static if (is(Context == EnumType))
203 					{
204 						unexpected(readChar() == '=', "Expected '='");
205 						int tag = readInt();
206 						if (context.options.get("allow_alias", "true") == "false"
207 								&& context.values.values.canFind(tag))
208 						{
209 							throw new DProtoSyntaxException("Enum values must not be duplicated");
210 						}
211 						unexpected(readChar() == ';', "Expected ';'");
212 						context.values[label] = tag;
213 						return;
214 					}
215 					else static if (hasMember!(Context, "fields"))
216 					{
217 							string type = reservedName(context, label);
218 							auto newfield = readField("optional", type, context);
219 							unexpected(context.fields.all!(a => a.id != newfield.id)(),
220 										"Repeated field ID");
221 							context.fields ~= newfield;
222 							return;
223 					}
224 					else
225 					{
226 						throw new DProtoSyntaxException("unexpected label: `" ~ label ~ '`');
227 					}
228 				}
229 			}
230 		}
231 
232 		/** Reads a message declaration. */
233 		MessageType readMessage(Context)(Context context) {
234 			auto ret = MessageType(readSymbolName(context));
235 			ret.options = context.options;
236 			unexpected(readChar() == '{', "Expected '{'");
237 			while (true) {
238 				readDocumentation();
239 				if (peekChar() == '}') {
240 					pos++;
241 					break;
242 				}
243 				readDeclaration(ret);
244 			}
245 			return ret;
246 		}
247 
248 		/** Reads an extend declaration (just ignores the content).
249 			@todo */
250 		void readExtend() {
251 			readName(); // Ignore this for now
252 			unexpected(readChar() == '{', "Expected '{'");
253 			while (true) {
254 				readDocumentation();
255 				if (peekChar() == '}') {
256 					pos++;
257 					break;
258 				}
259 				//readDeclaration();
260 			}
261 			return;
262 		}
263 
264 		/** Reads a service declaration and returns it. */
265 		Service readService(Context)(Context context) {
266 			string name = readSymbolName(context);
267 			auto ret = Service(name);
268 
269 			Service.Method[] methods = [];
270 			unexpected(readChar() == '{', "Expected '{'");
271 			while (true) {
272 				readDocumentation();
273 				if (peekChar() == '}') {
274 					pos++;
275 					break;
276 				}
277 				readDeclaration(ret);
278 			}
279 			return ret;
280 		}
281 
282 
283 		/** Reads an rpc method and returns it. */
284 		Service.Method readRpc(Context)(Context context) {
285 			string documentation = "";
286 			string name = readSymbolName(context);
287 
288 			unexpected(readChar() == '(', "Expected '('");
289 			string requestType = readSymbolName(context);
290 			unexpected(readChar() == ')', "Expected ')'");
291 
292 			unexpected(readWord() == "returns", "Expected 'returns'");
293 
294 			unexpected(readChar() == '(', "Expected '('");
295 			string responseType = readSymbolName(context);
296 			// @todo check for option prefixes, responseType is the last in the white spaced list
297 			unexpected(readChar() == ')', "Expected ')'");
298 
299 			auto ret = Service.Method(name, documentation, requestType, responseType);
300 
301 			/* process service options and documentation */
302 			if (peekChar() == '{') {
303 				pos++;
304 				while (true) {
305 					readDocumentation();
306 					if (peekChar() == '}') {
307 						pos++;
308 						break;
309 					}
310 					readDeclaration(ret);
311 				}
312 			}
313 			else if (readChar() != ';') {
314 				throw new DProtoSyntaxException("Expected ';'");
315 			}
316 			return ret;
317 		}
318 
319 		/** Reads an enumerated type declaration and returns it. */
320 		EnumType readEnumType(Context)(Context context) {
321 			auto ret = EnumType(readSymbolName(context));
322 			unexpected(readChar() == '{', "Expected '{'");
323 			while (true) {
324 				readDocumentation();
325 				if (peekChar() == '}') {
326 					pos++;
327 					break;
328 				}
329 				readDeclaration(ret);
330 			}
331 			return ret;
332 		}
333 
334 		/** Reads a field declaration and returns it. */
335 		Field readField(Context)(string label, string type, Context context) {
336 			Field.Requirement labelEnum = label.toUpper().to!(Field.Requirement)();
337 			string name = readSymbolName(context);
338 			unexpected(readChar() == '=', "Expected '='");
339 			int tag = readInt();
340 			enforce((0 < tag && tag < 19000) || (19999 < tag && tag < 2^^29),
341 					new DProtoSyntaxException(
342 						"Invalid tag number: "~tag.to!string()));
343 			char c = peekChar();
344 			Options options;
345 			if (c == '[') {
346 				options = readMap('[', ']', '=');
347 				c = peekChar();
348 			}
349 			if (c == ';') {
350 				pos++;
351 				if (labelEnum != Field.Requirement.REPEATED && options.get("packed", "false") != "false") {
352 					throw new DProtoSyntaxException("[packed = true] can only be specified for repeated primitive fields");
353 				}
354 				return Field(labelEnum, type, name, tag, options);
355 			}
356 			throw new DProtoSyntaxException("Expected ';'");
357 		}
358 
359 		/** Reads extensions like "extensions 101;" or "extensions 101 to max;".
360 			@todo */
361 		Extension readExtensions(Context)(Context context) {
362 			Extension ret;
363 			int minVal = readInt(); // Range start.
364 			if (peekChar() != ';') {
365 				unexpected(readWord() == "to", "Expected 'to'");
366 				string maxVal = readWord(); // Range end.
367 				if(maxVal != "max") {
368 					if(maxVal[0..2] == "0x") {
369 						ret.maxVal = maxVal[2..$].to!uint(16);
370 					} else {
371 						ret.maxVal = maxVal.to!uint();
372 					}
373 				}
374 			} else {
375 				ret.minVal = minVal;
376 				ret.maxVal = minVal;
377 			}
378 			unexpected(readChar() == ';', "Expected ';'");
379 			return ret;
380 		}
381 
382 		/** Reads a option containing a name, an '=' or ':', and a value. */
383 		Option readOption(char keyValueSeparator) {
384 			string name = readName(); // Option name.
385 			unexpected(readChar() == keyValueSeparator, "Expected '" ~ keyValueSeparator ~ "' in option");
386 			string value = (peekChar() == '{') ? readMap('{', '}', ':').to!string() : readString();
387 			return Option(name, value);
388 		}
389 
390 		/**
391 		 * Returns a map of string keys and values. This is similar to a JSON object,
392 		 * with '{' and '}' surrounding the map, ':' separating keys from values, and
393 		 * ',' separating entries.
394 		 */
395 		Options readMap(char openBrace, char closeBrace, char keyValueSeparator) {
396 			unexpected(readChar() == openBrace, openBrace ~ " to begin map");
397 			Options result;
398 			while (peekChar() != closeBrace) {
399 
400 				Option option = readOption(keyValueSeparator);
401 				result[option.name] = option.value;
402 
403 				char c = peekChar();
404 				if (c == ',') {
405 					pos++;
406 				} else if (c != closeBrace) {
407 					throw new DProtoSyntaxException("Expected ',' or '" ~ closeBrace ~ "'");
408 				}
409 			}
410 
411 			// If we see the close brace, finish immediately. This handles {}/[] and ,}/,] cases.
412 			pos++;
413 			return result;
414 		}
415 
416 	private:
417 
418 		/** Reads a non-whitespace character and returns it. */
419 		char readChar() {
420 			char result = peekChar();
421 			pos++;
422 			return result;
423 		}
424 
425 		/**
426 		 * Peeks a non-whitespace character and returns it. The only difference
427 		 * between this and {@code readChar} is that this doesn't consume the char.
428 		 */
429 		char peekChar() {
430 			skipWhitespace(true);
431 			unexpected(pos != data.length, "unexpected end of file");
432 			return data[pos];
433 		}
434 
435 		/** Reads a quoted or unquoted string and returns it. */
436 		string readString() {
437 			skipWhitespace(true);
438 			return peekChar() == '"' ? readQuotedString() : readWord();
439 		}
440 
441 		string readQuotedString() {
442 			skipWhitespace(true);
443 			auto c = readChar();
444 			enforce(c == '"', new DProtoSyntaxException("Expected \" but got " ~ c));
445 			string result;
446 			while (pos < data.length) {
447 				c = data[pos++];
448 				if (c == '"') return '"'~result~'"';
449 
450 				if (c == '\\') {
451 					unexpected(pos != data.length, "unexpected end of file");
452 					c = data[pos++];
453 				}
454 
455 				result ~= c;
456 				if (c == '\n') newline();
457 			}
458 			throw new DProtoSyntaxException("unterminated string");
459 		}
460 
461 		string readQuotedPath() {
462 			skipWhitespace(true);
463 			unexpected(readChar() == '"', "imports should be quoted");
464 			auto ret = readWord(pathPattern);
465 			unexpected(readChar() == '"', "imports should be quoted");
466 			return ret;
467 		}
468 
469 		/** Reads a (paren-wrapped), [square-wrapped] or naked symbol name. */
470 		string readName() {
471 			string optionName;
472 			char c = peekChar();
473 			if (c == '(') {
474 				pos++;
475 				optionName = readWord();
476 				unexpected(readChar() == ')', "Expected ')'");
477 			} else if (c == '[') {
478 				pos++;
479 				optionName = readWord();
480 				unexpected(readChar() == ']', "Expected ']'");
481 			} else {
482 				optionName = readWord();
483 			}
484 			return optionName;
485 		}
486 
487 		/** Reads a symbol name */
488 		string readSymbolName(Context)(Context context) {
489 			string name = readWord();
490 			return reservedName(context, name);
491 		}
492 
493 		/** Format a reserved D name */
494 		string reservedName(Context)(Context context, string name) {
495 			if(isDKeyword(name))
496 			{
497 				// Wrapped in quotes to properly evaluate string
498 				string reservedFmtRaw = context.options.get("dproto_reserved_fmt", `"%s_"`);
499 				string reservedFmt;
500 				formattedRead(reservedFmtRaw, `"%s"`, &reservedFmt);
501 				if(reservedFmt != "%s")
502 				{
503 					name = reservedFmt.format(name);
504 				}
505 				else
506 				{
507 					throw new DProtoReservedWordException("Reserved word: "~name);
508 				}
509 			}
510 			return name;
511 		}
512 
513 		/** Reads a non-empty word and returns it. */
514 		string readWord(string pattern = wordPattern) {
515 			skipWhitespace(true);
516 			int start = pos;
517 			while (pos < data.length) {
518 				char c = data[pos];
519 				if(pattern.canFind(c)) {
520 					pos++;
521 				} else {
522 					break;
523 				}
524 			}
525 			unexpected(start != pos, "Expected a word");
526 			return data[start .. pos].idup;
527 		}
528 
529 		/** Reads an integer and returns it. */
530 		int readInt() {
531 			string tag = readWord();
532 			try {
533 				int radix = 10;
534 				if (tag.startsWith("0x")) {
535 					tag = tag["0x".length .. $];
536 					radix = 16;
537 				}
538 				else if (tag.startsWith("0")) {
539 					radix = 8;
540 				}
541 				return tag.to!int(radix);
542 			} catch (Exception e) {
543 				throw new DProtoSyntaxException(
544 						"Expected an integer but was `" ~ tag ~ "`",
545 						e.msg);
546 			}
547 		}
548 
549 		/**
550 		 * Like {@link #skipWhitespace}, but this returns a string containing all
551 		 * comment text. By convention, comments before a declaration document that
552 		 * declaration.
553 		 */
554 		string readDocumentation() {
555 			string result = null;
556 			while (true) {
557 				skipWhitespace(false);
558 				if (pos == data.length || data[pos] != '/') {
559 					return result != null ? cleanUpDocumentation(result) : "";
560 				}
561 				string comment = readComment();
562 				result = (result == null) ? comment : (result ~ "\n" ~ comment);
563 			}
564 		}
565 
566 		/** Reads a comment and returns its body. */
567 		string readComment() {
568 			enforce(!(pos == data.length || data[pos] != '/'), new DProtoSyntaxException(""));
569 			pos++;
570 			int commentType = pos < data.length ? data[pos++] : -1;
571 			if (commentType == '*') {
572 				int start = pos;
573 				while (pos + 1 < data.length) {
574 					if (data[pos] == '*' && data[pos + 1] == '/') {
575 						pos += 2;
576 						return data[start .. pos - 2].idup;
577 					} else {
578 						char c = data[pos++];
579 						if (c == '\n') newline();
580 					}
581 				}
582 				throw new DProtoSyntaxException("unterminated comment");
583 			} else if (commentType == '/') {
584 				int start = pos;
585 				while (pos < data.length) {
586 					char c = data[pos++];
587 					if (c == '\n') {
588 						newline();
589 						break;
590 					}
591 				}
592 				return data[start .. pos - 1].idup;
593 			} else {
594 				throw new DProtoSyntaxException("unexpected '/'");
595 			}
596 		}
597 
598 		/**
599 		 * Returns a string like {@code comment}, but without leading whitespace or
600 		 * asterisks.
601 		 */
602 		string cleanUpDocumentation(string comment) {
603 			string result;
604 			bool beginningOfLine = true;
605 			for (int i = 0; i < comment.length; i++) {
606 				char c = comment[i];
607 				if (!beginningOfLine || ! " \t*".canFind(c)) {
608 					result ~= c;
609 					beginningOfLine = false;
610 				}
611 				if (c == '\n') {
612 					beginningOfLine = true;
613 				}
614 			}
615 			return result.strip();
616 		}
617 
618 		/**
619 		 * Skips whitespace characters and optionally comments. When this returns,
620 		 * either {@code pos == data.length} or a non-whitespace character.
621 		 */
622 		void skipWhitespace(bool skipComments) {
623 			while (pos < data.length) {
624 				char c = data[pos];
625 				if (" \t\r\n".canFind(c)) {
626 					pos++;
627 					if (c == '\n') newline();
628 				} else if (skipComments && c == '/') {
629 					readComment();
630 				} else {
631 					break;
632 				}
633 			}
634 		}
635 
636 		/** Call this everytime a '\n' is encountered. */
637 		void newline() {
638 			line++;
639 			lineStart = pos;
640 		}
641 
642 		void unexpected(bool value, string message)
643 		{
644 			if (!value)
645 			{
646 				throw new DProtoSyntaxException(message, fileName, line + 1);
647 			}
648 		}
649 
650 		/** Returns true if the name is a reserved word in D
651 		 *
652 		 * This will cause problems trying to use them as variables
653 		 * Note: Some keywords are specifically whitelisted,
654 		 * in order to allow usage of the protobuf names
655 		 */
656 		bool isDKeyword(string name)
657 		{
658 			// dfmt off
659 			enum KEYWORDS = [
660 				"abstract", "alias", "align", "asm", "assert", "auto",
661 				"body", /+ "bool", +/ "break", "byte",
662 				"case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal",
663 				"dchar", "debug", "default", "delegate", "delete", "deprecated", "do", /+ "double", +/
664 				"else", "enum", "export", "extern",
665 				"false", "final", "finally", /+ "float", +/ "for", "foreach", "foreach_reverse", "function",
666 				"goto",
667 				"idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is",
668 				"lazy", "long",
669 				"macro", "mixin", "module",
670 				"new", "nothrow", "null",
671 				"out", "override",
672 				"package", "pragma", "private", "protected", "public", "pure",
673 				"real", "ref", "return",
674 				"scope", "shared", "short", "static", "struct", "super", "switch", "synchronized",
675 				"template", "this", "throw", "true", "try", "typedef", "typeid", "typeof",
676 				"ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort",
677 				"version", "void", "volatile",
678 				"wchar", "while", "with",
679 				"__FILE__", "__MODULE__", "__LINE__", "__FUNCTION__", "__PRETTY_FUNCTION__",
680 				"__gshared", "__traits", "__vector", "__parameters",
681 			];
682 			// dfmt on
683 			return KEYWORDS.canFind(name);
684 		}
685 
686 	}
687 
688 	return ProtoSchemaParser(name_, data_).readProtoPackage();
689 
690 }
691