1 /*******************************************************************************
2  * Convert a .proto file into a string representing the class
3  *
4  * Author: Matthew Soucy, msoucy@csh.rit.edu
5  * Date: Oct 5, 2013
6  * Version: 0.0.2
7  */
8 module dproto.parse;
9 
10 import dproto.exception;
11 import dproto.intermediate;
12 
13 import std.algorithm;
14 import std.array;
15 import std.conv;
16 import std.exception;
17 import std.stdio;
18 import std.string;
19 import std.traits;
20 
21 /**
22  * Basic parser for {@code .proto} schema declarations.
23  *
24  * <p>This parser throws away data that it doesn't care about. In particular,
25  * unrecognized options, and extensions are discarded. It doesn't retain nesting
26  * within types.
27  */
28 ProtoPackage ParseProtoSchema(const string name_, string data_) {
29 
30 	struct ProtoSchemaParser {
31 
32 		/** The path to the {@code .proto} file. */
33 		string fileName;
34 
35 		/** The entire document. */
36 		const char[] data;
37 
38 		/** Our cursor within the document. {@code data[pos]} is the next character to be read. */
39 		int pos;
40 
41 		/** The number of newline characters encountered thus far. */
42 		int line;
43 
44 		/** The index of the most recent newline character. */
45 		int lineStart;
46 
47 
48 		ProtoPackage readProtoPackage() {
49 			auto ret = ProtoPackage(fileName);
50 			while (true) {
51 				readDocumentation();
52 				if (pos == data.length) {
53 					return ret;
54 				}
55 				readDeclaration(ret);
56 			}
57 		}
58 
59 		this(string _fileName, string _data)
60 		{
61 			fileName = _fileName;
62 			data = _data;
63 		}
64 
65 	private:
66 
67 		void readDeclaration(Context, string ContextName = Context.stringof)(ref Context context) {
68 			// Skip unnecessary semicolons, occasionally used after a nested message declaration.
69 			if (peekChar() == ';') {
70 				pos++;
71 				return;
72 			}
73 
74 			string label = readWord();
75 
76 			switch(label) {
77 				case "package": {
78 					static if(is(Context==ProtoPackage)) {
79 						enforce(context.packageName == null, unexpected("too many package names"));
80 						context.packageName = readName();
81 						enforce(readChar() == ';', unexpected("expected ';'"));
82 						return;
83 					} else {
84 						throw unexpected("package in " ~ ContextName);
85 					}
86 				}
87 				case "import": {
88 					static if(is(Context==ProtoPackage)) {
89 						context.dependencies ~= readString();
90 						enforce(readChar() == ';', unexpected("expected ';'"));
91 						return;
92 					} else {
93 						throw unexpected("import in " ~ ContextName);
94 					}
95 				}
96 				case "option": {
97 					Option result = readOption('=');
98 					enforce(readChar() == ';', unexpected("expected ';'"));
99 					context.options[result.name] = result.value;
100 					return;
101 				}
102 				case "message": {
103 					static if(hasMember!(Context, "messageTypes")) {
104 						context.messageTypes ~= readMessage();
105 						return;
106 					} else {
107 						throw unexpected("message in " ~ ContextName);
108 					}
109 				}
110 				case "enum": {
111 					static if(hasMember!(Context, "enumTypes")) {
112 						context.enumTypes ~= readEnumType();
113 						return;
114 					} else {
115 						throw unexpected("enum in " ~ ContextName);
116 					}
117 				}
118 				/+
119 				case "service": {
120 					readService();
121 					return;
122 				}
123 				+/
124 				case "extend": {
125 					readExtend();
126 					return;
127 				}
128 				/+
129 				case "rpc": {
130 					static if( hasMember!(Context, "rpc")) {
131 						readRpc();
132 						return;
133 					} else {
134 						throw unexpected("rpc in " ~ context)
135 					}
136 				}
137 				+/
138 				case "required":
139 				case "optional":
140 				case "repeated": {
141 					static if( hasMember!(Context, "fields") ) {
142 						context.fields ~= readField(label);
143 						return;
144 					} else {
145 						throw unexpected("fields must be nested");
146 					}
147 				}
148 				case "extensions": {
149 					static if(!is(Context==ProtoPackage)) {
150 						readExtensions();
151 						return;
152 					} else {
153 						throw unexpected("extensions must be nested");
154 					}
155 				}
156 				default: {
157 					static if(is(Context==EnumType)) {
158 						enforce(readChar() == '=', unexpected("expected '='"));
159 						int tag = readInt();
160 						enforce(readChar() == ';', unexpected("expected ';'"));
161 						context.values[label] = tag;
162 						return;
163 					} else {
164 						throw unexpected("unexpected label: " ~ label);
165 					}
166 				}
167 			}
168 		}
169 
170 		/** Reads a message declaration. */
171 		MessageType readMessage() {
172 			auto ret = MessageType(readName());
173 			enforce(readChar() == '{', unexpected("expected '{'"));
174 			while (true) {
175 				readDocumentation();
176 				if (peekChar() == '}') {
177 					pos++;
178 					break;
179 				}
180 				readDeclaration(ret);
181 			}
182 			return ret;
183 		}
184 
185 		/** Reads an extend declaration (just ignores the content).
186 			@todo */
187 		void readExtend() {
188 			readName(); // Ignore this for now
189 			enforce(readChar() == '{', unexpected("expected '{'"));
190 			while (true) {
191 				readDocumentation();
192 				if (peekChar() == '}') {
193 					pos++;
194 					break;
195 				}
196 				//readDeclaration();
197 			}
198 			return;
199 		}
200 
201 		static if(0)
202 		/** Reads a service declaration and returns it.
203 			@todo */
204 		Service readService() {
205 			string name = readName();
206 			Service.Method[] methods = [];
207 			enforce(readChar() == '{', unexpected("expected '{'"));
208 			while (true) {
209 				string methodDocumentation = readDocumentation();
210 				if (peekChar() == '}') {
211 					pos++;
212 					break;
213 				}
214 				Object declared = readDeclaration(Context.SERVICE);
215 				if (cast(Service.Method)declared) {
216 					methods.add(cast(Service.Method) declared);
217 				}
218 			}
219 			return new Service(name, methods);
220 		}
221 
222 		/** Reads an enumerated type declaration and returns it. */
223 		EnumType readEnumType() {
224 			auto ret = EnumType(readName());
225 			enforce(readChar() == '{', unexpected("expected '{'"));
226 			while (true) {
227 				readDocumentation();
228 				if (peekChar() == '}') {
229 					pos++;
230 					break;
231 				}
232 				readDeclaration(ret);
233 			}
234 			return ret;
235 		}
236 
237 		/** Reads an field declaration and returns it. */
238 		Field readField(string label) {
239 			Field.Requirement labelEnum = label.toUpper().to!(Field.Requirement)();
240 			string type = readName();
241 			string name = readName();
242 			enforce(readChar() == '=', unexpected("expected '='"));
243 			int tag = readInt();
244 			enforce((0 < tag && tag < 19000) || (19999 < tag && tag < 2^^29), new DProtoException("Invalid tag number: "~tag.to!string()));
245 			char c = peekChar();
246 			Options options;
247 			if (c == '[') {
248 				options = readMap('[', ']', '=');
249 				c = peekChar();
250 			}
251 			if (c == ';') {
252 				pos++;
253 				return Field(labelEnum, type, name, tag, options);
254 			}
255 			throw unexpected("expected ';'");
256 		}
257 
258 		/** Reads extensions like "extensions 101;" or "extensions 101 to max;".
259 			@todo */
260 		Extension readExtensions() {
261 			Extension ret;
262 			int minVal = readInt(); // Range start.
263 			if (peekChar() != ';') {
264 				readWord(); // Literal 'to'
265 				string maxVal = readWord(); // Range end.
266 				if(maxVal != "max") {
267 					if(maxVal[0..2] == "0x") {
268 						ret.maxVal = maxVal[2..$].to!uint(16);
269 					} else {
270 						ret.maxVal = maxVal.to!uint();
271 					}
272 				}
273 			} else {
274 				ret.minVal = minVal;
275 				ret.maxVal = minVal;
276 			}
277 			enforce(readChar() == ';', unexpected("expected ';'"));
278 			return ret;
279 		}
280 
281 		/** Reads a option containing a name, an '=' or ':', and a value. */
282 		Option readOption(char keyValueSeparator) {
283 			string name = readName(); // Option name.
284 			enforce(readChar() == keyValueSeparator, unexpected("expected '" ~ keyValueSeparator ~ "' in option"));
285 			string value = (peekChar() == '{') ? readMap('{', '}', ':').to!string() : readString();
286 			return Option(name, value);
287 		}
288 
289 		/**
290 		 * Returns a map of string keys and values. This is similar to a JSON object,
291 		 * with '{' and '}' surrounding the map, ':' separating keys from values, and
292 		 * ',' separating entries.
293 		 */
294 		Options readMap(char openBrace, char closeBrace, char keyValueSeparator) {
295 			enforce(readChar() == openBrace, unexpected(openBrace ~ " to begin map"));
296 			Options result;
297 			while (peekChar() != closeBrace) {
298 
299 				Option option = readOption(keyValueSeparator);
300 				result[option.name] = option.value;
301 
302 				char c = peekChar();
303 				if (c == ',') {
304 					pos++;
305 				} else if (c != closeBrace) {
306 					throw unexpected("expected ',' or '" ~ closeBrace ~ "'");
307 				}
308 			}
309 
310 			// If we see the close brace, finish immediately. This handles {}/[] and ,}/,] cases.
311 			pos++;
312 			return result;
313 		}
314 
315 		static if(0)
316 		/** Reads an rpc method and returns it.
317 			@todo */
318 		Service.Method readRpc(string documentation) {
319 			string name = readName();
320 
321 			enforce(readChar() == '(', unexpected("expected '('"));
322 			string requestType = readName();
323 			enforce(readChar() == ')', unexpected("expected ')'"));
324 
325 			enforce(readWord() != "returns", unexpected("expected 'returns'"));
326 
327 			enforce(readChar() == '(', unexpected("expected '('"));
328 			string responseType = readName();
329 			enforce(readChar() == ')', unexpected("expected ')'"));
330 
331 			Option[] options = [];
332 			if (peekChar() == '{') {
333 				pos++;
334 				while (true) {
335 					string methodDocumentation = readDocumentation();
336 					if (peekChar() == '}') {
337 						pos++;
338 						break;
339 					}
340 					Object declared = readDeclaration(methodDocumentation, Context.RPC);
341 					if (cast(Option)declared) {
342 						Option option = cast(Option) declared;
343 						options.put(option.getName(), option.getValue());
344 					}
345 				}
346 			} else if (readChar() != ';') throw unexpected("expected ';'");
347 
348 			return new Service.Method(name, documentation, requestType, responseType, options);
349 		}
350 
351 	private:
352 
353 		/** Reads a non-whitespace character and returns it. */
354 		char readChar() {
355 			char result = peekChar();
356 			pos++;
357 			return result;
358 		}
359 
360 		/**
361 		 * Peeks a non-whitespace character and returns it. The only difference
362 		 * between this and {@code readChar} is that this doesn't consume the char.
363 		 */
364 		char peekChar() {
365 			skipWhitespace(true);
366 			enforce(pos != data.length, unexpected("unexpected end of file"));
367 			return data[pos];
368 		}
369 
370 		/** Reads a quoted or unquoted string and returns it. */
371 		string readString() {
372 			skipWhitespace(true);
373 			return peekChar() == '"' ? readQuotedString() : readWord();
374 		}
375 
376 		string readQuotedString() {
377 			enforce(readChar() == '"', new DProtoException(""));
378 			string result;
379 			while (pos < data.length) {
380 				char c = data[pos++];
381 				if (c == '"') return '"'~result~'"';
382 
383 				if (c == '\\') {
384 					enforce(pos != data.length, unexpected("unexpected end of file"));
385 					c = data[pos++];
386 				}
387 
388 				result ~= c;
389 				if (c == '\n') newline();
390 			}
391 			throw unexpected("unterminated string");
392 		}
393 
394 		/** Reads a (paren-wrapped), [square-wrapped] or naked symbol name. */
395 		string readName() {
396 			string optionName;
397 			char c = peekChar();
398 			if (c == '(') {
399 				pos++;
400 				optionName = readWord();
401 				enforce(readChar() == ')', unexpected("expected ')'"));
402 			} else if (c == '[') {
403 				pos++;
404 				optionName = readWord();
405 				enforce(readChar() == ']', unexpected("expected ']'"));
406 			} else {
407 				optionName = readWord();
408 			}
409 			return optionName;
410 		}
411 
412 		/** Reads a non-empty word and returns it. */
413 		string readWord() {
414 			skipWhitespace(true);
415 			int start = pos;
416 			while (pos < data.length) {
417 				char c = data[pos];
418 				if(c.inPattern(`a-zA-Z0-9_.\-`)) {
419 					pos++;
420 				} else {
421 					break;
422 				}
423 			}
424 			enforce(start != pos, unexpected("expected a word"));
425 			return data[start .. pos].idup;
426 		}
427 
428 		/** Reads an integer and returns it. */
429 		int readInt() {
430 			string tag = readWord();
431 			try {
432 				int radix = 10;
433 				if (tag.startsWith("0x")) {
434 					tag = tag["0x".length .. $];
435 					radix = 16;
436 				}
437 				return tag.to!int(radix);
438 			} catch (Exception e) {
439 				throw unexpected("expected an integer but was " ~ tag);
440 			}
441 		}
442 
443 		/**
444 		 * Like {@link #skipWhitespace}, but this returns a string containing all
445 		 * comment text. By convention, comments before a declaration document that
446 		 * declaration.
447 		 */
448 		string readDocumentation() {
449 			string result = null;
450 			while (true) {
451 				skipWhitespace(false);
452 				if (pos == data.length || data[pos] != '/') {
453 					return result != null ? cleanUpDocumentation(result) : "";
454 				}
455 				string comment = readComment();
456 				result = (result == null) ? comment : (result ~ "\n" ~ comment);
457 			}
458 		}
459 
460 		/** Reads a comment and returns its body. */
461 		string readComment() {
462 			enforce(!(pos == data.length || data[pos] != '/'), new DProtoException(""));
463 			pos++;
464 			int commentType = pos < data.length ? data[pos++] : -1;
465 			if (commentType == '*') {
466 				int start = pos;
467 				while (pos + 1 < data.length) {
468 					if (data[pos] == '*' && data[pos + 1] == '/') {
469 						pos += 2;
470 						return data[start .. pos - 2].idup;
471 					} else {
472 						char c = data[pos++];
473 						if (c == '\n') newline();
474 					}
475 				}
476 				throw unexpected("unterminated comment");
477 			} else if (commentType == '/') {
478 				int start = pos;
479 				while (pos < data.length) {
480 					char c = data[pos++];
481 					if (c == '\n') {
482 						newline();
483 						break;
484 					}
485 				}
486 				return data[start .. pos - 1].idup;
487 			} else {
488 				throw unexpected("unexpected '/'");
489 			}
490 		}
491 
492 		/**
493 		 * Returns a string like {@code comment}, but without leading whitespace or
494 		 * asterisks.
495 		 */
496 		string cleanUpDocumentation(string comment) {
497 			string result;
498 			bool beginningOfLine = true;
499 			for (int i = 0; i < comment.length; i++) {
500 				char c = comment[i];
501 				if (!beginningOfLine || ! " \t*".canFind(c)) {
502 					result ~= c;
503 					beginningOfLine = false;
504 				}
505 				if (c == '\n') {
506 					beginningOfLine = true;
507 				}
508 			}
509 			return result.strip();
510 		}
511 
512 		/**
513 		 * Skips whitespace characters and optionally comments. When this returns,
514 		 * either {@code pos == data.length} or a non-whitespace character.
515 		 */
516 		void skipWhitespace(bool skipComments) {
517 			while (pos < data.length) {
518 				char c = data[pos];
519 				if (" \t\r\n".canFind(c)) {
520 					pos++;
521 					if (c == '\n') newline();
522 				} else if (skipComments && c == '/') {
523 					readComment();
524 				} else {
525 					break;
526 				}
527 			}
528 		}
529 
530 		/** Call this everytime a '\n' is encountered. */
531 		void newline() {
532 			line++;
533 			lineStart = pos;
534 		}
535 
536 		Exception unexpected(string message) {
537 			throw new DProtoException("Syntax error in %s at %d:%d: %s"
538 					.format(fileName, line+1, (pos - lineStart + 1), message));
539 		}
540 
541 	}
542 
543 	return ProtoSchemaParser(name_, data_).readProtoPackage();
544 
545 }