1 /*******************************************************************************
2  * Convert a .proto file into a string representing the class
3  *
4  * Author: Matthew Soucy, msoucy@csh.rit.edu
5  * Date: Oct 5, 2013
6  * Version: 0.0.2
7  */
8 module dproto.parse;
9 
10 import dproto.exception;
11 import dproto.intermediate;
12 import dproto.serialize : isBuiltinType;
13 
14 import std.algorithm;
15 import std.array;
16 import std.conv;
17 import std.exception;
18 import std.stdio;
19 import std.string;
20 import std.format;
21 import std.traits;
22 
23 /**
24  * Basic parser for {@code .proto} schema declarations.
25  *
26  * <p>This parser throws away data that it doesn't care about. In particular,
27  * unrecognized options, and extensions are discarded. It doesn't retain nesting
28  * within types.
29  */
30 ProtoPackage ParseProtoSchema(const string name_, string data_)
31 {
32 
33 	struct ProtoSchemaParser {
34 
35 		/** The path to the {@code .proto} file. */
36 		string fileName;
37 
38 		/** The entire document. */
39 		const char[] data;
40 
41 		/** Our cursor within the document. {@code data[pos]} is the next character to be read. */
42 		int pos;
43 
44 		/** The number of newline characters encountered thus far. */
45 		int line;
46 
47 		/** The index of the most recent newline character. */
48 		int lineStart;
49 
50 
51 		ProtoPackage readProtoPackage() {
52 			auto ret = ProtoPackage(fileName);
53 			while (true) {
54 				readDocumentation();
55 				if (pos == data.length) {
56 					return ret;
57 				}
58 				readDeclaration(ret);
59 			}
60 		}
61 
62 		this(string _fileName, string _data)
63 		{
64 			fileName = _fileName;
65 			data = _data;
66 		}
67 
68 	private:
69 
70 		void readDeclaration(Context, string ContextName = Context.stringof)(ref Context context) {
71 			// Skip unnecessary semicolons, occasionally used after a nested message declaration.
72 			if (peekChar() == ';') {
73 				pos++;
74 				return;
75 			}
76 
77 			string label = readWord();
78 
79 			switch(label) {
80 				case "syntax": {
81 					static if(is(Context==ProtoPackage)) {
82 						unexpected(context.syntax == null, "too many syntax statements");
83 						unexpected(readChar() == '=', "Expected '=' after 'syntax'");
84 						unexpected(peekChar() == '"', `Expected opening quote '"' after 'syntax ='`);
85 						context.syntax = readQuotedString();
86 						unexpected(context.syntax == `"proto2"` || context.syntax == `"proto3"`,
87 						           "Unexpected syntax version: `" ~ context.syntax ~ "`");
88 						unexpected(readChar() == ';', "Expected ';' after syntax declaration");
89 						return;
90 					} else {
91 						throw new DProtoSyntaxException("syntax in " ~ ContextName);
92 					}
93 				}
94 				case "package": {
95 					static if(is(Context==ProtoPackage)) {
96 						unexpected(context.packageName == null, "too many package names");
97 						context.packageName = readSymbolName(context);
98 						unexpected(readChar() == ';', "Expected ';'");
99 						return;
100 					} else {
101 						throw new DProtoSyntaxException("package in " ~ ContextName);
102 					}
103 				}
104 				case "import": {
105 					static if(is(Context==ProtoPackage)) {
106 						bool isPublicImport = false;
107 						if(peekChar() == 'p') {
108 							unexpected(readWord() == "public", "Expected 'public'");
109 							isPublicImport = true;
110 						}
111 						if(peekChar() == '"') {
112 							context.dependencies ~= Dependency(readQuotedPath (), isPublicImport);
113 						}
114 						unexpected(readChar() == ';', "Expected ';'");
115 						return;
116 					} else {
117 						throw new DProtoSyntaxException("import in " ~ ContextName);
118 					}
119 				}
120 				case "option": {
121 					Option result = readOption('=');
122 					unexpected(readChar() == ';', "Expected ';'");
123 					context.options[result.name] = result.value;
124 					return;
125 				}
126 				case "message": {
127 					static if(hasMember!(Context, "messageTypes")) {
128 						context.messageTypes ~= readMessage(context);
129 						return;
130 					} else {
131 						throw new DProtoSyntaxException("message in " ~ ContextName);
132 					}
133 				}
134 				case "enum": {
135 					static if(hasMember!(Context, "enumTypes")) {
136 						context.enumTypes ~= readEnumType(context);
137 						return;
138 					} else {
139 						throw new DProtoSyntaxException("enum in " ~ ContextName);
140 					}
141 				}
142 				case "extend": {
143 					readExtend();
144 					return;
145 				}
146 				case "service": {
147 					static if(hasMember!(Context, "rpcServices")) {
148 						context.rpcServices ~= readService(context);
149 						return;
150 					} else {
151 						throw new DProtoSyntaxException("service in " ~ ContextName);
152 					}
153 				}
154 				case "rpc": {
155 					static if( hasMember!(Context, "rpc")) {
156 						context.rpc ~= readRpc(context);
157 						return;
158 					} else {
159 						throw new DProtoSyntaxException("rpc in " ~ ContextName);
160 					}
161 				}
162 				case "required":
163 				case "optional":
164 				case "repeated": {
165 					static if( hasMember!(Context, "fields") ) {
166 						string type = readSymbolName(context);
167 						context.fields ~= readField(label, type, context);
168 						return;
169 					} else {
170 						throw new DProtoSyntaxException("Fields must be nested");
171 					}
172 				}
173 				case "extensions": {
174 					static if(!is(Context==ProtoPackage)) {
175 						readExtensions(context);
176 						return;
177 					} else {
178 						throw new DProtoSyntaxException("Extensions must be nested");
179 					}
180 				}
181 				default: {
182 					static if (is(Context == EnumType))
183 					{
184 						unexpected(readChar() == '=', "Expected '='");
185 						int tag = readInt();
186 						if (context.options.get("allow_alias", "true") == "false"
187 								&& context.values.values.canFind(tag))
188 						{
189 							throw new DProtoSyntaxException("Enum values must not be duplicated");
190 						}
191 						unexpected(readChar() == ';', "Expected ';'");
192 						context.values[label] = tag;
193 						return;
194 					}
195 					else
196 					{
197 						static if (hasMember!(Context, "fields"))
198 						{
199 							if (isBuiltinType(label))
200 							{
201 								context.fields ~= readField("optional", label, context);
202 								return;
203 							}
204 						}
205 						throw new DProtoSyntaxException("unexpected label: `" ~ label ~ '`');
206 					}
207 				}
208 			}
209 		}
210 
211 		/** Reads a message declaration. */
212 		MessageType readMessage(Context)(Context context) {
213 			auto ret = MessageType(readSymbolName(context));
214 			ret.options = context.options;
215 			unexpected(readChar() == '{', "Expected '{'");
216 			while (true) {
217 				readDocumentation();
218 				if (peekChar() == '}') {
219 					pos++;
220 					break;
221 				}
222 				readDeclaration(ret);
223 			}
224 			return ret;
225 		}
226 
227 		/** Reads an extend declaration (just ignores the content).
228 			@todo */
229 		void readExtend() {
230 			readName(); // Ignore this for now
231 			unexpected(readChar() == '{', "Expected '{'");
232 			while (true) {
233 				readDocumentation();
234 				if (peekChar() == '}') {
235 					pos++;
236 					break;
237 				}
238 				//readDeclaration();
239 			}
240 			return;
241 		}
242 
243 		/** Reads a service declaration and returns it. */
244 		Service readService(Context)(Context context) {
245 			string name = readSymbolName(context);
246 			auto ret = Service(name);
247 
248 			Service.Method[] methods = [];
249 			unexpected(readChar() == '{', "Expected '{'");
250 			while (true) {
251 				readDocumentation();
252 				if (peekChar() == '}') {
253 					pos++;
254 					break;
255 				}
256 				readDeclaration(ret);
257 			}
258 			return ret;
259 		}
260 
261 
262 		/** Reads an rpc method and returns it. */
263 		Service.Method readRpc(Context)(Context context) {
264 			string documentation = "";
265 			string name = readSymbolName(context);
266 
267 			unexpected(readChar() == '(', "Expected '('");
268 			string requestType = readSymbolName(context);
269 			unexpected(readChar() == ')', "Expected ')'");
270 
271 			unexpected(readWord() == "returns", "Expected 'returns'");
272 
273 			unexpected(readChar() == '(', "Expected '('");
274 			string responseType = readSymbolName(context);
275 			// @todo check for option prefixes, responseType is the last in the white spaced list
276 			unexpected(readChar() == ')', "Expected ')'");
277 
278 			auto ret = Service.Method(name, documentation, requestType, responseType);
279 
280 			/* process service options and documentation */
281 			if (peekChar() == '{') {
282 				pos++;
283 				while (true) {
284 					readDocumentation();
285 					if (peekChar() == '}') {
286 						pos++;
287 						break;
288 					}
289 					readDeclaration(ret);
290 				}
291 			}
292 			else if (readChar() != ';') {
293 				throw new DProtoSyntaxException("Expected ';'");
294 			}
295 			return ret;
296 		}
297 
298 		/** Reads an enumerated type declaration and returns it. */
299 		EnumType readEnumType(Context)(Context context) {
300 			auto ret = EnumType(readSymbolName(context));
301 			unexpected(readChar() == '{', "Expected '{'");
302 			while (true) {
303 				readDocumentation();
304 				if (peekChar() == '}') {
305 					pos++;
306 					break;
307 				}
308 				readDeclaration(ret);
309 			}
310 			return ret;
311 		}
312 
313 		/** Reads a field declaration and returns it. */
314 		Field readField(Context)(string label, string type, Context context) {
315 			Field.Requirement labelEnum = label.toUpper().to!(Field.Requirement)();
316 			string name = readSymbolName(context);
317 			unexpected(readChar() == '=', "Expected '='");
318 			int tag = readInt();
319 			enforce((0 < tag && tag < 19000) || (19999 < tag && tag < 2^^29),
320 					new DProtoSyntaxException(
321 						"Invalid tag number: "~tag.to!string()));
322 			char c = peekChar();
323 			Options options;
324 			if (c == '[') {
325 				options = readMap('[', ']', '=');
326 				c = peekChar();
327 			}
328 			if (c == ';') {
329 				pos++;
330 				return Field(labelEnum, type, name, tag, options);
331 			}
332 			throw new DProtoSyntaxException("Expected ';'");
333 		}
334 
335 		/** Reads extensions like "extensions 101;" or "extensions 101 to max;".
336 			@todo */
337 		Extension readExtensions(Context)(Context context) {
338 			Extension ret;
339 			int minVal = readInt(); // Range start.
340 			if (peekChar() != ';') {
341 				unexpected(readWord() == "to", "Expected 'to'");
342 				string maxVal = readWord(); // Range end.
343 				if(maxVal != "max") {
344 					if(maxVal[0..2] == "0x") {
345 						ret.maxVal = maxVal[2..$].to!uint(16);
346 					} else {
347 						ret.maxVal = maxVal.to!uint();
348 					}
349 				}
350 			} else {
351 				ret.minVal = minVal;
352 				ret.maxVal = minVal;
353 			}
354 			unexpected(readChar() == ';', "Expected ';'");
355 			return ret;
356 		}
357 
358 		/** Reads a option containing a name, an '=' or ':', and a value. */
359 		Option readOption(char keyValueSeparator) {
360 			string name = readName(); // Option name.
361 			unexpected(readChar() == keyValueSeparator, "Expected '" ~ keyValueSeparator ~ "' in option");
362 			string value = (peekChar() == '{') ? readMap('{', '}', ':').to!string() : readString();
363 			return Option(name, value);
364 		}
365 
366 		/**
367 		 * Returns a map of string keys and values. This is similar to a JSON object,
368 		 * with '{' and '}' surrounding the map, ':' separating keys from values, and
369 		 * ',' separating entries.
370 		 */
371 		Options readMap(char openBrace, char closeBrace, char keyValueSeparator) {
372 			unexpected(readChar() == openBrace, openBrace ~ " to begin map");
373 			Options result;
374 			while (peekChar() != closeBrace) {
375 
376 				Option option = readOption(keyValueSeparator);
377 				result[option.name] = option.value;
378 
379 				char c = peekChar();
380 				if (c == ',') {
381 					pos++;
382 				} else if (c != closeBrace) {
383 					throw new DProtoSyntaxException("Expected ',' or '" ~ closeBrace ~ "'");
384 				}
385 			}
386 
387 			// If we see the close brace, finish immediately. This handles {}/[] and ,}/,] cases.
388 			pos++;
389 			return result;
390 		}
391 
392 	private:
393 
394 		/** Reads a non-whitespace character and returns it. */
395 		char readChar() {
396 			char result = peekChar();
397 			pos++;
398 			return result;
399 		}
400 
401 		/**
402 		 * Peeks a non-whitespace character and returns it. The only difference
403 		 * between this and {@code readChar} is that this doesn't consume the char.
404 		 */
405 		char peekChar() {
406 			skipWhitespace(true);
407 			unexpected(pos != data.length, "unexpected end of file");
408 			return data[pos];
409 		}
410 
411 		/** Reads a quoted or unquoted string and returns it. */
412 		string readString() {
413 			skipWhitespace(true);
414 			return peekChar() == '"' ? readQuotedString() : readWord();
415 		}
416 
417 		string readQuotedString() {
418 			skipWhitespace(true);
419 			auto c = readChar();
420 			enforce(c == '"', new DProtoSyntaxException("Expected \" but got " ~ c));
421 			string result;
422 			while (pos < data.length) {
423 				c = data[pos++];
424 				if (c == '"') return '"'~result~'"';
425 
426 				if (c == '\\') {
427 					unexpected(pos != data.length, "unexpected end of file");
428 					c = data[pos++];
429 				}
430 
431 				result ~= c;
432 				if (c == '\n') newline();
433 			}
434 			throw new DProtoSyntaxException("unterminated string");
435 		}
436 
437 		string readQuotedPath() {
438 			skipWhitespace(true);
439 			unexpected(readChar() == '"', "imports should be quoted");
440 			auto ret = readWord(`a-zA-Z0-9_.\-/`);
441 			unexpected(readChar() == '"', "imports should be quoted");
442 			return ret;
443 		}
444 
445 		/** Reads a (paren-wrapped), [square-wrapped] or naked symbol name. */
446 		string readName() {
447 			string optionName;
448 			char c = peekChar();
449 			if (c == '(') {
450 				pos++;
451 				optionName = readWord();
452 				unexpected(readChar() == ')', "Expected ')'");
453 			} else if (c == '[') {
454 				pos++;
455 				optionName = readWord();
456 				unexpected(readChar() == ']', "Expected ']'");
457 			} else {
458 				optionName = readWord();
459 			}
460 			return optionName;
461 		}
462 
463 		/** Reads a symbol name */
464 		string readSymbolName(Context)(Context context) {
465 			string name = readWord();
466 			if(isDKeyword(name))
467 			{
468 				// Wrapped in quotes to properly evaluate string
469 				string reservedFmtRaw = context.options.get("dproto_reserved_fmt", `"%s_"`);
470 				string reservedFmt;
471 				formattedRead(reservedFmtRaw, `"%s"`, &reservedFmt);
472 				if(reservedFmt != "%s")
473 				{
474 					name = reservedFmt.format(name);
475 				}
476 				else
477 				{
478 					throw new DProtoReservedWordException(name);
479 				}
480 			}
481 			return name;
482 		}
483 
484 		/** Reads a non-empty word and returns it. */
485 		string readWord(string pattern = `a-zA-Z0-9_.\-`) {
486 			skipWhitespace(true);
487 			int start = pos;
488 			while (pos < data.length) {
489 				char c = data[pos];
490 				if(c.inPattern(pattern)) {
491 					pos++;
492 				} else {
493 					break;
494 				}
495 			}
496 			unexpected(start != pos, "Expected a word");
497 			return data[start .. pos].idup;
498 		}
499 
500 		/** Reads an integer and returns it. */
501 		int readInt() {
502 			string tag = readWord();
503 			try {
504 				int radix = 10;
505 				if (tag.startsWith("0x")) {
506 					tag = tag["0x".length .. $];
507 					radix = 16;
508 				}
509 				return tag.to!int(radix);
510 			} catch (Exception e) {
511 				throw new DProtoSyntaxException(
512 						"Expected an integer but was `" ~ tag ~ "`",
513 						e.msg);
514 			}
515 		}
516 
517 		/**
518 		 * Like {@link #skipWhitespace}, but this returns a string containing all
519 		 * comment text. By convention, comments before a declaration document that
520 		 * declaration.
521 		 */
522 		string readDocumentation() {
523 			string result = null;
524 			while (true) {
525 				skipWhitespace(false);
526 				if (pos == data.length || data[pos] != '/') {
527 					return result != null ? cleanUpDocumentation(result) : "";
528 				}
529 				string comment = readComment();
530 				result = (result == null) ? comment : (result ~ "\n" ~ comment);
531 			}
532 		}
533 
534 		/** Reads a comment and returns its body. */
535 		string readComment() {
536 			enforce(!(pos == data.length || data[pos] != '/'), new DProtoSyntaxException(""));
537 			pos++;
538 			int commentType = pos < data.length ? data[pos++] : -1;
539 			if (commentType == '*') {
540 				int start = pos;
541 				while (pos + 1 < data.length) {
542 					if (data[pos] == '*' && data[pos + 1] == '/') {
543 						pos += 2;
544 						return data[start .. pos - 2].idup;
545 					} else {
546 						char c = data[pos++];
547 						if (c == '\n') newline();
548 					}
549 				}
550 				throw new DProtoSyntaxException("unterminated comment");
551 			} else if (commentType == '/') {
552 				int start = pos;
553 				while (pos < data.length) {
554 					char c = data[pos++];
555 					if (c == '\n') {
556 						newline();
557 						break;
558 					}
559 				}
560 				return data[start .. pos - 1].idup;
561 			} else {
562 				throw new DProtoSyntaxException("unexpected '/'");
563 			}
564 		}
565 
566 		/**
567 		 * Returns a string like {@code comment}, but without leading whitespace or
568 		 * asterisks.
569 		 */
570 		string cleanUpDocumentation(string comment) {
571 			string result;
572 			bool beginningOfLine = true;
573 			for (int i = 0; i < comment.length; i++) {
574 				char c = comment[i];
575 				if (!beginningOfLine || ! " \t*".canFind(c)) {
576 					result ~= c;
577 					beginningOfLine = false;
578 				}
579 				if (c == '\n') {
580 					beginningOfLine = true;
581 				}
582 			}
583 			return result.strip();
584 		}
585 
586 		/**
587 		 * Skips whitespace characters and optionally comments. When this returns,
588 		 * either {@code pos == data.length} or a non-whitespace character.
589 		 */
590 		void skipWhitespace(bool skipComments) {
591 			while (pos < data.length) {
592 				char c = data[pos];
593 				if (" \t\r\n".canFind(c)) {
594 					pos++;
595 					if (c == '\n') newline();
596 				} else if (skipComments && c == '/') {
597 					readComment();
598 				} else {
599 					break;
600 				}
601 			}
602 		}
603 
604 		/** Call this everytime a '\n' is encountered. */
605 		void newline() {
606 			line++;
607 			lineStart = pos;
608 		}
609 
610 		void unexpected(bool value, string message)
611 		{
612 			if (!value)
613 			{
614 				new DProtoSyntaxException(
615 					"Syntax error in %s at %d:%d: %s".format(fileName, line + 1,
616 					(pos - lineStart + 1), message));
617 			}
618 		}
619 
620 		/** Returns true if the name is a reserved word in D
621 		 *
622 		 * This will cause problems trying to use them as variables
623 		 * Note: Some keywords are specifically whitelisted,
624 		 * in order to allow usage of the protobuf names
625 		 */
626 		bool isDKeyword(string name)
627 		{
628 			// dfmt off
629 			enum KEYWORDS = [
630 				"abstract", "alias", "align", "asm", "assert", "auto",
631 				"body", /+ "bool", +/ "break", "byte",
632 				"case", "cast", "catch", "cdouble", "cent", "cfloat", "char", "class", "const", "continue", "creal",
633 				"dchar", "debug", "default", "delegate", "delete", "deprecated", "do", /+ "double", +/
634 				"else", "enum", "export", "extern",
635 				"false", "final", "finally", /+ "float", +/ "for", "foreach", "foreach_reverse", "function",
636 				"goto",
637 				"idouble", "if", "ifloat", "immutable", "import", "in", "inout", "int", "interface", "invariant", "ireal", "is",
638 				"lazy", "long",
639 				"macro", "mixin", "module",
640 				"new", "nothrow", "null",
641 				"out", "override",
642 				"package", "pragma", "private", "protected", "public", "pure",
643 				"real", "ref", "return",
644 				"scope", "shared", "short", "static", "struct", "super", "switch", "synchronized",
645 				"template", "this", "throw", "true", "try", "typedef", "typeid", "typeof",
646 				"ubyte", "ucent", "uint", "ulong", "union", "unittest", "ushort",
647 				"version", "void", "volatile",
648 				"wchar", "while", "with",
649 				"__FILE__", "__MODULE__", "__LINE__", "__FUNCTION__", "__PRETTY_FUNCTION__",
650 				"__gshared", "__traits", "__vector", "__parameters",
651 			];
652 			// dfmt on
653 			return KEYWORDS.canFind(name);
654 		}
655 
656 	}
657 
658 	return ProtoSchemaParser(name_, data_).readProtoPackage();
659 
660 }
661