From 7d12fb3ec4e7470f8db9b41ea0e8843127c370cc Mon Sep 17 00:00:00 2001 From: Pedro Date: Fri, 7 Nov 2014 16:29:46 +0100 Subject: [PATCH] added keyword list generator. Now understanding names in double quotes --- src/build_and_run_tests.sh | 30 ++++++------- src/lib/Expr.cpp | 3 +- src/lib/Expr.h | 5 +++ src/parser/.gitignore | 0 src/parser/bison_parser.y | 25 ++++++----- src/parser/flex_lexer.l | 70 ++++++++++++++++++++--------- src/parser/keywordlist_generator.py | 47 +++++++++++++++++++ src/parser/sql_keywords.txt | 66 +++++++++++++++++++++++++++ 8 files changed, 196 insertions(+), 50 deletions(-) create mode 100644 src/parser/.gitignore create mode 100644 src/parser/keywordlist_generator.py create mode 100644 src/parser/sql_keywords.txt diff --git a/src/build_and_run_tests.sh b/src/build_and_run_tests.sh index f4f3432..d5ed9cc 100644 --- a/src/build_and_run_tests.sh +++ b/src/build_and_run_tests.sh @@ -10,30 +10,28 @@ make grammar_test echo "\n\n" ./bin/grammar_test "SELECT a FROM foo WHERE a > 12 OR b > 3 AND NOT c LIMIT 10" -# ./bin/grammar_test "SELECT col1, col2, 'test' FROM table, foo AS t WHERE age > 12 AND zipcode = 12345 GROUP BY col1;" -./bin/grammar_test "SELECT age FROM table AS t1, (SELECT * FROM table2) AS t2 ORDER BY age DESC LIMIT 10; SELECT * AS table;" -# ./bin/grammar_test "SELECT * from table JOIN table2 ON a = b WHERE (b OR NOT a) AND a = 12.5" -# ./bin/grammar_test "(SELECT a FROM foo WHERE a > 12 OR b > 3 AND c LIKE 's%' LIMIT 10);" -# ./bin/grammar_test "(SELECT a FROM foo WHERE a > 12 OR b > 3 AND c NOT LIKE 's%' LIMIT 10);" -# ./bin/grammar_test "SELECT t1.a, t1.b, t2.c FROM table AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5" +./bin/grammar_test "SELECT col1, col2, 'test' FROM \"table\", foo AS t WHERE age > 12 AND zipcode = 12345 GROUP BY col1;" +./bin/grammar_test "SELECT * from \"table\" JOIN table2 ON a = b WHERE (b OR NOT a) AND a = 12.5" +./bin/grammar_test "(SELECT a FROM foo WHERE a > 12 OR b > 3 AND c NOT LIKE 's%' LIMIT 10);" +./bin/grammar_test "SELECT t1.a, t1.b, t2.c FROM \"table\" AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5" -# ./bin/grammar_test "IMPORT FROM TBL FILE 'students.tbl' INTO table" +./bin/grammar_test "IMPORT FROM TBL FILE 'students.tbl' INTO \"table\"" # Error: Where clause in between join statement -# ./bin/grammar_test -f "SELECT * from table WHERE (b OR NOT a) AND a = 12.5 AS t1 JOIN table2 ON a = b" -# ./bin/grammar_test -f "SELECT * table WHERE (b OR NOT a) AND a = 12.5 AS t1 JOIN table2 ON a = b" +./bin/grammar_test -f "SELECT * from \"table\" WHERE (b OR NOT a) AND a = 12.5 AS t1 JOIN table2 ON a = b" +./bin/grammar_test -f "SELECT * \"table\" WHERE (b OR NOT a) AND a = 12.5 AS t1 JOIN table2 ON a = b" echo "\n\n" -# ./bin/analysis "SELECT a FROM foo WHERE a > 12 OR b > 3 AND c = 3" -# ./bin/analysis "SELECT col1, col2, 'test' FROM table t1, foo WHERE age > 12 AND zipcode = 12345 GROUP BY col1 ORDER BY col2 DESC LIMIT 100;" -# ./bin/analysis "SELECT * from table AS t1 JOIN table2 AS t2 ON t1.a = t2.b WHERE (b OR NOT a) AND a = 12.5" -# ./bin/analysis "SELECT t1.a, t1.b, t2.c FROM table AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5" +./bin/analysis "SELECT a FROM foo WHERE a > 12 OR b > 3 AND c = 3" +# ./bin/analysis "SELECT col1, col2, 'test' FROM tbl t1, foo WHERE age > 12 AND zipcode = 12345 GROUP BY col1 ORDER BY col2 DESC LIMIT 100;" +# ./bin/analysis "SELECT * from tbl AS t1 JOIN table2 AS t2 ON t1.a = t2.b WHERE (b OR NOT a) AND a = 12.5" +# ./bin/analysis "SELECT t1.a, t1.b, t2.c FROM tbl AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5" # ./bin/analysis "-- test -# SELECT * FROM table WHERE a NOT LIKE '%s' -- inline comment +# SELECT * FROM \"table\" WHERE a NOT LIKE '%s' -- inline comment # --my comment" # ./bin/analysis " -# IMPORT FROM TBL FILE 'students.tbl' INTO table; -# SELECT * FROM table;" +# IMPORT FROM TBL FILE 'students.tbl' INTO tbl; +# SELECT * FROM tbl;" echo "\n\n" \ No newline at end of file diff --git a/src/lib/Expr.cpp b/src/lib/Expr.cpp index f415599..1aceca9 100644 --- a/src/lib/Expr.cpp +++ b/src/lib/Expr.cpp @@ -60,8 +60,7 @@ Expr* Expr::makeLiteral(double value) { Expr* Expr::makeLiteral(char* string) { ALLOC_EXPR(e, kExprLiteralString); - e->name = substr(string, 1, strlen(string)-1); - delete string; + e->name = string; return e; } diff --git a/src/lib/Expr.h b/src/lib/Expr.h index 42f5749..a2b07d8 100644 --- a/src/lib/Expr.h +++ b/src/lib/Expr.h @@ -6,6 +6,11 @@ namespace hsql { +// Helper function +char* substr(const char* source, int from, int to); + + + typedef enum { kExprLiteralFloat, kExprLiteralString, diff --git a/src/parser/.gitignore b/src/parser/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/src/parser/bison_parser.y b/src/parser/bison_parser.y index 4a8c288..12e456a 100644 --- a/src/parser/bison_parser.y +++ b/src/parser/bison_parser.y @@ -98,16 +98,19 @@ typedef void* yyscan_t; /********************************* ** Token Definition *********************************/ -%token SELECT FROM WHERE GROUP BY HAVING ORDER ASC DESC LIMIT DISTINCT OFFSET -%token JOIN ON INNER OUTER LEFT RIGHT CROSS USING NATURAL -%token CREATE TABLE DATABASE INDEX -%token IMPORT CSV FILE TBL CONTROL INTO -%token DELETE INSERT -%token AS NOT AND OR NULL LIKE %token NAME STRING COMPARISON %token FLOAT %token INT -%token EQUALS NOTEQUALS LESS GREATER LESSEQ GREATEREQ +%token NOTEQUALS LESSEQ GREATEREQ + +/* SQL Keywords */ +%token DISTINCT DATABASE NATURAL CONTROL BETWEEN SELECT +%token HAVING OFFSET CREATE IMPORT RENAME DELETE INSERT +%token UPDATE UNLOAD COLUMN ISNULL WHERE GROUP ORDER LIMIT +%token INNER OUTER RIGHT CROSS USING TABLE INDEX ALTER FROM +%token DESC JOIN LEFT FILE DROP LOAD INTO NULL LIKE TOP ASC +%token CSV TBL NOT AND BY ON AS OR IN IS + /********************************* ** Non-Terminal types (http://www.gnu.org/software/bison/manual/html_node/Type-Decl.html) @@ -295,6 +298,8 @@ binary_expr: | expr '+' expr { $$ = Expr::makeOpBinary($1, '+', $3); } | expr '/' expr { $$ = Expr::makeOpBinary($1, '/', $3); } | expr '*' expr { $$ = Expr::makeOpBinary($1, '*', $3); } + | expr '%' expr { $$ = Expr::makeOpBinary($1, '%', $3); } + | expr '^' expr { $$ = Expr::makeOpBinary($1, '^', $3); } | expr AND expr { $$ = Expr::makeOpBinary($1, Expr::AND, $3); } | expr OR expr { $$ = Expr::makeOpBinary($1, Expr::OR, $3); } | expr LIKE expr { $$ = Expr::makeOpBinary($1, Expr::LIKE, $3); } @@ -303,10 +308,10 @@ binary_expr: comp_expr: - expr EQUALS expr { $$ = Expr::makeOpBinary($1, '=', $3); } + expr '=' expr { $$ = Expr::makeOpBinary($1, '=', $3); } | expr NOTEQUALS expr { $$ = Expr::makeOpBinary($1, Expr::NOT_EQUALS, $3); } - | expr LESS expr { $$ = Expr::makeOpBinary($1, '<', $3); } - | expr GREATER expr { $$ = Expr::makeOpBinary($1, '>', $3); } + | expr '<' expr { $$ = Expr::makeOpBinary($1, '<', $3); } + | expr '>' expr { $$ = Expr::makeOpBinary($1, '>', $3); } | expr LESSEQ expr { $$ = Expr::makeOpBinary($1, Expr::LESS_EQ, $3); } | expr GREATEREQ expr { $$ = Expr::makeOpBinary($1, Expr::GREATER_EQ, $3); } ; diff --git a/src/parser/flex_lexer.l b/src/parser/flex_lexer.l index 71d623b..fc37e6d 100644 --- a/src/parser/flex_lexer.l +++ b/src/parser/flex_lexer.l @@ -57,45 +57,64 @@ DISTINCT TOKEN(DISTINCT) -OFFSET TOKEN(OFFSET) +DATABASE TOKEN(DATABASE) +NATURAL TOKEN(NATURAL) +CONTROL TOKEN(CONTROL) +BETWEEN TOKEN(BETWEEN) SELECT TOKEN(SELECT) -INSERT TOKEN(INSERT) -IMPORT TOKEN(IMPORT) -CREATE TOKEN(CREATE) -DELETE TOKEN(DELETE) HAVING TOKEN(HAVING) -GROUP TOKEN(GROUP) +OFFSET TOKEN(OFFSET) +CREATE TOKEN(CREATE) +IMPORT TOKEN(IMPORT) +RENAME TOKEN(RENAME) +DELETE TOKEN(DELETE) +INSERT TOKEN(INSERT) +UPDATE TOKEN(UPDATE) +UNLOAD TOKEN(UNLOAD) +COLUMN TOKEN(COLUMN) +ISNULL TOKEN(ISNULL) WHERE TOKEN(WHERE) -LIMIT TOKEN(LIMIT) +GROUP TOKEN(GROUP) ORDER TOKEN(ORDER) +LIMIT TOKEN(LIMIT) INNER TOKEN(INNER) OUTER TOKEN(OUTER) +RIGHT TOKEN(RIGHT) CROSS TOKEN(CROSS) +USING TOKEN(USING) +TABLE TOKEN(TABLE) +INDEX TOKEN(INDEX) +ALTER TOKEN(ALTER) FROM TOKEN(FROM) -INTO TOKEN(INTO) -LIKE TOKEN(LIKE) -JOIN TOKEN(JOIN) -FILE TOKEN(FILE) DESC TOKEN(DESC) +JOIN TOKEN(JOIN) +LEFT TOKEN(LEFT) +FILE TOKEN(FILE) +DROP TOKEN(DROP) +LOAD TOKEN(LOAD) +INTO TOKEN(INTO) +NULL TOKEN(NULL) +LIKE TOKEN(LIKE) +TOP TOKEN(TOP) ASC TOKEN(ASC) -NOT TOKEN(NOT) +CSV TOKEN(CSV) TBL TOKEN(TBL) +NOT TOKEN(NOT) AND TOKEN(AND) BY TOKEN(BY) -OR TOKEN(OR) -AS TOKEN(AS) ON TOKEN(ON) +AS TOKEN(AS) +OR TOKEN(OR) +IN TOKEN(IN) +IS TOKEN(IS) -"=" TOKEN(EQUALS) -"<>" TOKEN(NOTEQUALS) -"<" TOKEN(LESS) -">" TOKEN(GREATER) -"<=" TOKEN(LESSEQ) -">=" TOKEN(GREATEREQ) +"<>" TOKEN(NOTEQUALS) +"<=" TOKEN(LESSEQ) +">=" TOKEN(GREATEREQ) -[-+*/(),.;] { return yytext[0]; } +[-+*/(),.;<>=^%] { return yytext[0]; } [0-9]+"."[0-9]* | @@ -109,6 +128,12 @@ ON TOKEN(ON) return SQL_INT; } +\"[A-Za-z][A-Za-z0-9_]*\" { + // Crop the leading and trailing quote char + yylval->sval = hsql::substr(yytext, 1, strlen(yytext)-1); + return SQL_NAME; +} + [A-Za-z][A-Za-z0-9_]* { yylval->sval = strdup(yytext); return SQL_NAME; @@ -116,7 +141,8 @@ ON TOKEN(ON) '[^'\n]*' { - yylval->sval = strdup(yytext); + // Crop the leading and trailing quote char + yylval->sval = hsql::substr(yytext, 1, strlen(yytext)-1); return SQL_STRING; } diff --git a/src/parser/keywordlist_generator.py b/src/parser/keywordlist_generator.py new file mode 100644 index 0000000..e4fc5bc --- /dev/null +++ b/src/parser/keywordlist_generator.py @@ -0,0 +1,47 @@ + +import math + + +with open("sql_keywords.txt", 'r') as fh: + keywords = [line.strip() for line in fh.readlines() if not line.strip().startswith("//") and len(line.strip()) > 0] + + keywords = sorted(keywords, key=lambda x: len(x), reverse=True) + + ################# + # Flex + + max_len = len(max(keywords, key=lambda x: len(x))) + 1 + max_len = 4 * int(math.ceil(max_len / 4.0)) + + for keyword in keywords: + len_diff = (max_len) - len(keyword) + num_tabs = int(math.floor(len_diff / 4.0)) + + if len_diff % 4 != 0: num_tabs += 1 + + tabs = ''.join(['\t' for _ in range(num_tabs)]) + print "%s%sTOKEN(%s)" % (keyword, tabs, keyword) + + # + ################# + + + ################# + # Bison + + + line = "%token" + max_len = 60 + + print "/* SQL Keywords */" + for keyword in keywords: + + if len(line + " " + keyword) > max_len: + print line + line = "%token " + keyword + else: + line = line + " " + keyword + print line + + # + ################# \ No newline at end of file diff --git a/src/parser/sql_keywords.txt b/src/parser/sql_keywords.txt new file mode 100644 index 0000000..d60c525 --- /dev/null +++ b/src/parser/sql_keywords.txt @@ -0,0 +1,66 @@ +// Possible source for more tokens https://www.sqlite.org/lang_keywords.html + +// Select statement +SELECT +TOP +FROM +WHERE +GROUP +BY +HAVING +ORDER +ASC +DESC +LIMIT +DISTINCT +OFFSET + +// Join clause +JOIN +ON +INNER +OUTER +LEFT +RIGHT +CROSS +USING +NATURAL + +// Create statement +CREATE +TABLE +DATABASE +INDEX + +// Import statement +IMPORT +CSV +FILE +TBL +CONTROL + +// other statements +ALTER +RENAME +DROP +DELETE +INSERT +LOAD +UPDATE +UNLOAD + +// misc. +COLUMN +INTO +AS + +// Expressions +NOT +AND +OR +NULL +LIKE +IN +IS +ISNULL +BETWEEN \ No newline at end of file