HyriseSQLParser/src/parser/bison_parser.y

529 lines
11 KiB
Plaintext
Raw Normal View History

2014-10-09 01:30:22 +02:00
%{
/**
* bison_parser.y
* defines bison_parser.h
* outputs bison_parser.c
*
* Grammar File Spec: http://dinosaur.compilertools.net/bison/bison_6.html
*
2014-10-09 01:30:22 +02:00
*/
/*********************************
** Section 1: C Declarations
*********************************/
2014-10-09 01:30:22 +02:00
2014-11-07 01:09:06 +01:00
#include "sqllib.h"
2014-10-16 15:35:38 +02:00
#include "bison_parser.h"
#include "flex_lexer.h"
2014-10-09 01:30:22 +02:00
#include <stdio.h>
2014-10-09 01:30:22 +02:00
using namespace hsql;
int yyerror(StatementList** result, yyscan_t scanner, const char *msg) {
StatementList* list = new StatementList();
list->isValid = false;
list->parser_msg = strdup(msg);
*result = list;
return 0;
2014-10-09 01:30:22 +02:00
}
%}
/*********************************
** Section 2: Bison Parser Declarations
*********************************/
// Define the names of the created files
2014-10-23 16:29:23 +02:00
%output "bison_parser.cpp"
%defines "bison_parser.h"
// Tell bison to create a reentrant parser
%define api.pure full
2014-10-09 01:30:22 +02:00
2014-10-31 18:24:47 +01:00
// Prefix the parser
%define api.prefix {hsql_}
%define api.token.prefix {SQL_}
2014-11-04 15:44:11 +01:00
%define parse.error verbose
// Specify code that is included in the generated .h and .c files
2014-10-09 01:30:22 +02:00
%code requires {
2014-10-20 22:33:36 +02:00
#ifndef YYtypeDEF_YY_SCANNER_T
#define YYtypeDEF_YY_SCANNER_T
2014-10-09 01:30:22 +02:00
typedef void* yyscan_t;
#endif
2014-10-31 18:24:47 +01:00
#define YYSTYPE HSQL_STYPE
2014-10-09 01:30:22 +02:00
}
// Define additional parameters for yylex (http://www.gnu.org/software/bison/manual/html_node/Pure-Calling.html)
2014-10-09 01:30:22 +02:00
%lex-param { yyscan_t scanner }
// Define additional parameters for yyparse
%parse-param { hsql::StatementList** result }
2014-10-09 01:30:22 +02:00
%parse-param { yyscan_t scanner }
/*********************************
** Define all data-types (http://www.gnu.org/software/bison/manual/html_node/Union-Decl.html)
*********************************/
2014-10-09 01:30:22 +02:00
%union {
2014-10-27 13:54:16 +01:00
double fval;
int64_t ival;
2014-10-09 01:30:22 +02:00
char* sval;
2014-10-27 14:54:15 +01:00
uint uval;
bool bval;
2014-10-09 01:30:22 +02:00
hsql::Statement* statement;
hsql::SelectStatement* select_stmt;
2014-11-07 01:09:06 +01:00
hsql::ImportStatement* import_stmt;
hsql::CreateStatement* create_stmt;
hsql::TableRef* table;
hsql::Expr* expr;
hsql::OrderDescription* order;
hsql::OrderType order_type;
hsql::LimitDescription* limit;
hsql::StatementList* stmt_list;
hsql::List<char*>* slist;
hsql::List<hsql::Expr*>* expr_list;
hsql::List<hsql::TableRef*>* table_list;
2014-10-09 01:30:22 +02:00
}
2014-10-09 04:46:25 +02:00
/*********************************
** Token Definition
*********************************/
%token <sval> IDENTIFIER STRING
2014-10-27 13:54:16 +01:00
%token <fval> FLOAT
%token <ival> INT
%token <uval> NOTEQUALS LESSEQ GREATEREQ
/* SQL Keywords */
2014-11-13 01:27:47 +01:00
%token DATABASE DISTINCT BETWEEN CONTROL NATURAL COLUMN
%token CREATE DELETE EXISTS HAVING IMPORT INSERT ISNULL
%token OFFSET RENAME SELECT UNLOAD UPDATE ALTER CROSS GROUP
2014-11-17 22:13:11 +01:00
%token INDEX INNER LIMIT ORDER OUTER RADIX RIGHT TABLE UNION
%token USING WHERE DESC DROP FILE FROM HASH INTO JOIN LEFT
%token LIKE LOAD NULL SCAN ALL AND ASC CSV NOT TBL TOP AS BY
%token IF IN IS ON OR
/*********************************
** Non-Terminal types (http://www.gnu.org/software/bison/manual/html_node/Type-Decl.html)
*********************************/
%type <stmt_list> statement_list
2014-10-27 14:54:15 +01:00
%type <statement> statement
2014-11-13 01:27:47 +01:00
%type <select_stmt> select_statement select_ref select_with_paren select_no_paren select_clause
2014-11-07 01:09:06 +01:00
%type <import_stmt> import_statement
%type <create_stmt> create_statement
2014-11-07 01:09:06 +01:00
%type <sval> table_name opt_alias alias file_path
%type <bval> opt_not_exists
2014-11-04 01:42:09 +01:00
%type <table> from_clause table_ref table_ref_atomic table_ref_name
2014-11-07 01:09:06 +01:00
%type <table> join_clause join_table
%type <expr> expr scalar_expr unary_expr binary_expr function_expr star_expr expr_alias
%type <expr> column_name literal int_literal num_literal string_literal
2014-11-13 01:27:47 +01:00
%type <expr> comp_expr opt_where join_condition
%type <expr_list> expr_list opt_group select_list
%type <table_list> table_ref_commalist
2014-11-13 01:27:47 +01:00
%type <order> opt_order
%type <limit> opt_limit
%type <order_type> opt_order_type
2014-11-17 22:13:11 +01:00
%type <uval> import_file_type opt_join_type opt_join_algorithm
2014-10-24 16:10:38 +02:00
/******************************
** Token Precedence and Associativity
** Precedence: lowest to highest
******************************/
%left OR
%left AND
%right NOT
%right '=' EQUALS NOTEQUALS LIKE
2014-10-24 16:10:38 +02:00
%nonassoc '<' '>' LESS GREATER LESSEQ GREATEREQ
%nonassoc NOTNULL
%nonassoc ISNULL
%nonassoc IS /* sets precedence for IS NULL, etc */
%left '+' '-'
%left '*' '/' '%'
%left '^'
/* Unary Operators */
2014-11-04 01:42:09 +01:00
%right UMINUS
2014-10-24 16:10:38 +02:00
%left '[' ']'
%left '(' ')'
%left '.'
2014-10-09 01:30:22 +02:00
%%
/*********************************
** Section 3: Grammar Definition
*********************************/
2014-10-09 01:30:22 +02:00
// Defines our general input.
// TODO: Support list of statements
2014-10-20 22:33:36 +02:00
input:
statement_list opt_semicolon { *result = $1; }
;
statement_list:
statement { $$ = new StatementList($1); }
| statement_list ';' statement { $1->push_back($3); $$ = $1; }
;
2014-10-09 01:30:22 +02:00
// All types of statements
2014-11-07 01:09:06 +01:00
// TODO: insert, delete, etc...
statement:
select_statement { $$ = $1; }
2014-11-07 01:09:06 +01:00
| import_statement { $$ = $1; }
| create_statement { $$ = $1; }
;
2014-10-09 01:30:22 +02:00
2014-11-07 01:09:06 +01:00
/******************************
** Import Statement
******************************/
import_statement:
IMPORT FROM import_file_type FILE file_path INTO table_name {
$$ = new ImportStatement();
$$->file_type = (ImportFileType) $3;
$$->file_path = $5;
$$->table_name = $7;
}
;
import_file_type:
CSV { $$ = kImportCSV; }
;
file_path:
string_literal { $$ = $1->name; }
2014-11-07 01:09:06 +01:00
;
/******************************
** Create Statement
******************************/
create_statement:
CREATE TABLE opt_not_exists table_name FROM TBL FILE file_path {
$$ = new CreateStatement();
$$->create_type = kTableFromTbl;
$$->if_not_exists = $3;
$$->table_name = $4;
$$->file_path = $8;
}
;
opt_not_exists:
IF NOT EXISTS { $$ = true; }
| /* empty */ { $$ = false; }
;
/******************************
2014-11-07 01:09:06 +01:00
** Select Statement
******************************/
2014-10-09 01:30:22 +02:00
select_statement:
2014-11-13 01:27:47 +01:00
select_with_paren
| select_no_paren
;
select_with_paren:
'(' select_no_paren ')' { $$ = $2; }
| '(' select_with_paren ')' { $$ = $2; }
;
select_no_paren:
select_clause opt_order opt_limit {
$$ = $1;
$$->order = $2;
$$->limit = $3;
}
| select_ref UNION select_ref opt_order opt_limit {
$$ = $1;
$$->union_select = $3;
2014-11-13 02:40:43 +01:00
// TODO: might overwrite order and limit of first select here
2014-11-13 01:27:47 +01:00
$$->order = $4;
$$->limit = $5;
}
;
select_ref:
select_clause
| select_with_paren
;
select_clause:
SELECT select_list from_clause opt_where opt_group {
$$ = new SelectStatement();
$$->select_list = $2;
$$->from_table = $3;
$$->where_clause = $4;
$$->group_by = $5;
}
2014-11-07 01:09:06 +01:00
;
2014-10-09 01:30:22 +02:00
2014-10-22 17:18:43 +02:00
select_list:
2014-10-24 16:10:38 +02:00
expr_list
;
2014-10-22 17:18:43 +02:00
2014-10-09 01:30:22 +02:00
from_clause:
FROM table_ref { $$ = $2; }
;
2014-10-09 01:30:22 +02:00
2014-11-13 01:27:47 +01:00
opt_where:
2014-10-24 16:10:38 +02:00
WHERE expr { $$ = $2; }
2014-10-09 04:46:25 +02:00
| /* empty */ { $$ = NULL; }
;
// TODO: having
2014-11-13 01:27:47 +01:00
opt_group:
GROUP BY expr_list { $$ = $3; }
| /* empty */ { $$ = NULL; }
;
2014-10-09 01:30:22 +02:00
2014-11-13 01:27:47 +01:00
opt_order:
ORDER BY expr opt_order_type { $$ = new OrderDescription($4, $3); }
2014-10-27 14:54:15 +01:00
| /* empty */ { $$ = NULL; }
2014-11-07 01:09:06 +01:00
;
2014-10-27 14:54:15 +01:00
2014-11-13 01:27:47 +01:00
opt_order_type:
2014-10-27 14:54:15 +01:00
ASC { $$ = kOrderAsc; }
| DESC { $$ = kOrderDesc; }
| /* empty */ { $$ = kOrderAsc; }
2014-11-07 01:09:06 +01:00
;
2014-10-27 14:54:15 +01:00
2014-11-13 01:27:47 +01:00
opt_limit:
2014-10-27 14:54:15 +01:00
LIMIT int_literal { $$ = new LimitDescription($2->ival, kNoOffset); delete $2; }
2014-11-13 01:27:47 +01:00
| LIMIT int_literal OFFSET int_literal { $$ = new LimitDescription($2->ival, $4->ival); delete $2; delete $4; }
2014-10-27 14:54:15 +01:00
| /* empty */ { $$ = NULL; }
2014-11-07 01:09:06 +01:00
;
2014-10-27 14:54:15 +01:00
2014-10-24 16:10:38 +02:00
/******************************
** Expressions
******************************/
expr_list:
expr_alias { $$ = new List<Expr*>($1); }
| expr_list ',' expr_alias { $1->push_back($3); $$ = $1; }
;
expr_alias:
expr opt_alias {
$$ = $1;
$$->alias = $2;
}
;
2014-10-24 16:10:38 +02:00
expr:
'(' expr ')' { $$ = $2; }
| scalar_expr
| unary_expr
| binary_expr
| function_expr
;
2014-10-09 01:30:22 +02:00
2014-10-24 16:10:38 +02:00
scalar_expr:
column_name
| star_expr
| literal
;
2014-10-24 16:10:38 +02:00
unary_expr:
2014-10-31 18:05:08 +01:00
'-' expr { $$ = Expr::makeOpUnary(Expr::UMINUS, $2); }
| NOT expr { $$ = Expr::makeOpUnary(Expr::NOT, $2); }
;
2014-10-09 01:30:22 +02:00
2014-10-24 16:10:38 +02:00
binary_expr:
comp_expr
| expr '-' expr { $$ = Expr::makeOpBinary($1, '-', $3); }
| expr '+' expr { $$ = Expr::makeOpBinary($1, '+', $3); }
| expr '/' expr { $$ = Expr::makeOpBinary($1, '/', $3); }
| expr '*' expr { $$ = Expr::makeOpBinary($1, '*', $3); }
| expr '%' expr { $$ = Expr::makeOpBinary($1, '%', $3); }
| expr '^' expr { $$ = Expr::makeOpBinary($1, '^', $3); }
| expr AND expr { $$ = Expr::makeOpBinary($1, Expr::AND, $3); }
| expr OR expr { $$ = Expr::makeOpBinary($1, Expr::OR, $3); }
| expr LIKE expr { $$ = Expr::makeOpBinary($1, Expr::LIKE, $3); }
| expr NOT LIKE expr { $$ = Expr::makeOpBinary($1, Expr::NOT_LIKE, $4); }
;
2014-10-24 16:10:38 +02:00
comp_expr:
expr '=' expr { $$ = Expr::makeOpBinary($1, '=', $3); }
| expr NOTEQUALS expr { $$ = Expr::makeOpBinary($1, Expr::NOT_EQUALS, $3); }
| expr '<' expr { $$ = Expr::makeOpBinary($1, '<', $3); }
| expr '>' expr { $$ = Expr::makeOpBinary($1, '>', $3); }
| expr LESSEQ expr { $$ = Expr::makeOpBinary($1, Expr::LESS_EQ, $3); }
| expr GREATEREQ expr { $$ = Expr::makeOpBinary($1, Expr::GREATER_EQ, $3); }
2014-10-24 16:10:38 +02:00
;
function_expr:
IDENTIFIER '(' expr ')' { $$ = Expr::makeFunctionRef($1, $3); }
;
column_name:
IDENTIFIER { $$ = Expr::makeColumnRef($1); }
| IDENTIFIER '.' IDENTIFIER { $$ = Expr::makeColumnRef($1, $3); }
;
literal:
string_literal
2014-10-27 14:54:15 +01:00
| num_literal
;
string_literal:
STRING { $$ = Expr::makeLiteral($1); }
;
2014-10-27 14:54:15 +01:00
num_literal:
FLOAT { $$ = Expr::makeLiteral($1); }
| int_literal
;
int_literal:
INT { $$ = Expr::makeLiteral($1); }
;
2014-10-24 16:10:38 +02:00
star_expr:
2014-10-27 11:23:31 +01:00
'*' { $$ = new Expr(kExprStar); }
;
/******************************
** Table
******************************/
table_ref:
table_ref_atomic
| table_ref_atomic ',' table_ref_commalist {
$3->push_back($1);
auto tbl = new TableRef(kTableCrossProduct);
tbl->list = $3;
$$ = tbl;
}
;
2014-11-04 01:42:09 +01:00
table_ref_atomic:
2014-11-04 01:42:09 +01:00
table_ref_name
| '(' select_statement ')' alias {
2014-10-27 11:23:31 +01:00
auto tbl = new TableRef(kTableSelect);
2014-10-27 12:09:53 +01:00
tbl->select = $2;
2014-11-04 01:42:09 +01:00
tbl->alias = $4;
$$ = tbl;
}
2014-11-07 01:09:06 +01:00
| join_clause
;
2014-11-03 23:57:42 +01:00
table_ref_commalist:
table_ref_atomic { $$ = new List<TableRef*>($1); }
| table_ref_commalist ',' table_ref_atomic { $1->push_back($3); $$ = $1; }
;
2014-11-03 23:57:42 +01:00
2014-11-04 01:42:09 +01:00
table_ref_name:
table_name opt_alias {
auto tbl = new TableRef(kTableName);
tbl->name = $1;
tbl->alias = $2;
2014-11-03 23:57:42 +01:00
$$ = tbl;
}
2014-11-04 01:42:09 +01:00
;
2014-11-03 23:57:42 +01:00
table_name:
IDENTIFIER
| IDENTIFIER '.' IDENTIFIER
;
2014-11-04 01:42:09 +01:00
alias:
AS IDENTIFIER { $$ = $2; }
| IDENTIFIER
2014-11-04 01:42:09 +01:00
;
opt_alias:
alias
| /* empty */ { $$ = NULL; }
2014-10-24 16:10:38 +02:00
/******************************
** Join Statements
******************************/
2014-11-07 01:09:06 +01:00
join_clause:
2014-11-17 22:13:11 +01:00
join_table opt_join_algorithm opt_join_type JOIN join_table ON join_condition
{
$$ = new TableRef(kTableJoin);
2014-11-17 22:13:11 +01:00
$$->join = new JoinDefinition();
$$->join->type = (JoinType) $2;
$$->join->algorithm = (JoinAlgorithm) $3;
$$->join->left = $1;
$$->join->right = $5;
$$->join->condition = $7;
}
;
2014-11-17 22:13:11 +01:00
opt_join_type:
INNER { $$ = kJoinInner; }
| OUTER { $$ = kJoinOuter; }
| LEFT { $$ = kJoinLeft; }
| RIGHT { $$ = kJoinRight; }
| /* empty, default */ { $$ = kJoinInner; }
;
opt_join_algorithm:
SCAN { $$ = kJoinAlgoScan; }
| HASH { $$ = kJoinAlgoHash; }
| RADIX { $$ = kJoinAlgoRadix; }
| /* empty, default */ { $$ = kJoinAlgoScan; }
join_table:
'(' select_statement ')' alias {
auto tbl = new TableRef(kTableSelect);
tbl->select = $2;
tbl->alias = $4;
$$ = tbl;
}
| table_ref_name;
join_condition:
expr
;
/******************************
** Misc
******************************/
2014-10-24 16:10:38 +02:00
opt_semicolon:
';'
| /* empty */
;
2014-10-20 22:33:36 +02:00
%%
/*********************************
** Section 4: Additional C code
*********************************/
/* empty */