Add tokenize method to SQLParser to output the list of tokens (#54)
Added tokenize benchmark. Restructured Makefile
This commit is contained in:
parent
12e35dcd63
commit
69d96061b2
|
@ -41,4 +41,4 @@ cmake-build-debug/
|
|||
*.cpp.orig
|
||||
*.h.orig
|
||||
|
||||
benchmark/parser_benchmark
|
||||
*.csv
|
|
@ -8,7 +8,7 @@ install:
|
|||
- sudo apt-get install -y flex valgrind
|
||||
- sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 90
|
||||
- sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 90
|
||||
|
||||
|
||||
# Install bison 3.0.4.
|
||||
- wget http://ftp.gnu.org/gnu/bison/bison-3.0.4.tar.gz
|
||||
- tar -xvzf bison-3.0.4.tar.gz
|
||||
|
@ -32,8 +32,7 @@ script:
|
|||
- make -j4
|
||||
|
||||
- make test
|
||||
- make test_format
|
||||
- make test_example
|
||||
|
||||
# Test if benchmark can be built.
|
||||
# - make build_benchmark
|
||||
# - make benchmark
|
||||
|
|
143
Makefile
143
Makefile
|
@ -1,53 +1,59 @@
|
|||
# Directories.
|
||||
all: library
|
||||
|
||||
#######################################
|
||||
############# Directories #############
|
||||
#######################################
|
||||
BIN = bin
|
||||
SRC = src
|
||||
SRCPARSER = src/parser
|
||||
|
||||
# Files.
|
||||
PARSERCPP = $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.cpp
|
||||
LIBCPP = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(PARSERCPP)
|
||||
LIBOBJ = $(LIBCPP:%.cpp=%.o)
|
||||
TESTCPP = $(shell find test/ -name '*.cpp')
|
||||
|
||||
ALLLIB = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*")
|
||||
ALLTEST = $(shell find test/ -name '*.cpp') $(shell find test/ -name '*.h')
|
||||
EXAMPLESRC = $(shell find example/ -name '*.cpp') $(shell find example/ -name '*.h')
|
||||
|
||||
# Compiler & linker flags.
|
||||
CFLAGS = -std=c++11 -Wall -Werror -fPIC
|
||||
LIBFLAGS = -shared
|
||||
TARGET = libsqlparser.so
|
||||
INSTALL = /usr/local
|
||||
|
||||
CTESTFLAGS = -Wall -Werror -Isrc/ -Itest/ -L./ -std=c++11 -lstdc++
|
||||
INSTALL = /usr/local
|
||||
|
||||
######################################
|
||||
############ Compile Mode ############
|
||||
######################################
|
||||
# Set compile mode to -g or -O3.
|
||||
MODE_LOG = ""
|
||||
# Debug mode: make mode=debug
|
||||
|
||||
mode ?= release
|
||||
MODE_LOG = ""
|
||||
OPT_FLAG =
|
||||
ifeq ($(mode), debug)
|
||||
CFLAGS += -g
|
||||
CTESTFLAGS += -g
|
||||
OPT_FLAG = -g
|
||||
MODE_LOG = "Building in \033[1;31mdebug\033[0m mode"
|
||||
else
|
||||
CFLAGS += -O3
|
||||
CTESTFLAGS += -O3
|
||||
OPT_FLAG = -O3
|
||||
MODE_LOG = "Building in \033[0;32mrelease\033[0m mode ('make mode=debug' for debug mode)"
|
||||
endif
|
||||
|
||||
GMAKE = make mode=$(mode)
|
||||
|
||||
all: library
|
||||
|
||||
library: $(TARGET)
|
||||
|
||||
$(TARGET): $(LIBOBJ)
|
||||
$(CXX) $(LIBFLAGS) -o $(TARGET) $(LIBOBJ)
|
||||
#######################################
|
||||
############### Library ###############
|
||||
#######################################
|
||||
PARSER_CPP = $(SRCPARSER)/bison_parser.cpp $(SRCPARSER)/flex_lexer.cpp
|
||||
PARSER_H = $(SRCPARSER)/bison_parser.h $(SRCPARSER)/flex_lexer.h
|
||||
|
||||
LIB_BUILD = libsqlparser.so
|
||||
LIB_CFLAGS = -std=c++11 -Wall -Werror -fPIC $(OPT_FLAG)
|
||||
LIB_LFLAGS = -shared $(OPT_FLAG)
|
||||
LIB_CPP = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(PARSER_CPP)
|
||||
LIB_H = $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*") $(PARSER_H)
|
||||
LIB_ALL = $(shell find $(SRC) -name '*.cpp' -not -path "$(SRCPARSER)/*") $(shell find $(SRC) -name '*.h' -not -path "$(SRCPARSER)/*")
|
||||
LIB_OBJ = $(LIB_CPP:%.cpp=%.o)
|
||||
|
||||
library: $(LIB_BUILD)
|
||||
|
||||
$(LIB_BUILD): $(LIB_OBJ)
|
||||
$(CXX) $(LIB_LFLAGS) -o $(LIB_BUILD) $(LIB_OBJ)
|
||||
|
||||
$(SRCPARSER)/flex_lexer.o: $(SRCPARSER)/flex_lexer.cpp $(SRCPARSER)/bison_parser.cpp
|
||||
$(CXX) $(CFLAGS) -c -o $@ $< -Wno-sign-compare -Wno-unneeded-internal-declaration -Wno-deprecated-register
|
||||
$(CXX) $(LIB_CFLAGS) -c -o $@ $< -Wno-sign-compare -Wno-unneeded-internal-declaration -Wno-deprecated-register
|
||||
|
||||
%.o: %.cpp $(PARSERCPP)
|
||||
$(CXX) $(CFLAGS) -c -o $@ $<
|
||||
%.o: %.cpp $(PARSER_CPP) $(LIB_H)
|
||||
$(CXX) $(LIB_CFLAGS) -c -o $@ $<
|
||||
|
||||
$(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y
|
||||
$(GMAKE) -C $(SRCPARSER)/ bison_parser.cpp
|
||||
|
@ -55,11 +61,13 @@ $(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y
|
|||
$(SRCPARSER)/flex_lexer.cpp: $(SRCPARSER)/flex_lexer.l
|
||||
$(GMAKE) -C $(SRCPARSER)/ flex_lexer.cpp
|
||||
|
||||
$(SRCPARSER)/bison_parser.h: $(SRCPARSER)/bison_parser.cpp
|
||||
$(SRCPARSER)/flex_lexer.h: $(SRCPARSER)/flex_lexer.cpp
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET)
|
||||
rm -f $(LIB_BUILD)
|
||||
rm -rf $(BIN)
|
||||
find $(SRC) -type f -name '*.o' -delete
|
||||
$(GMAKE) -C benchmark/ clean
|
||||
|
||||
cleanparser:
|
||||
$(GMAKE) -C $(SRCPARSER)/ clean
|
||||
|
@ -67,50 +75,73 @@ cleanparser:
|
|||
cleanall: clean cleanparser
|
||||
|
||||
install:
|
||||
cp $(TARGET) $(INSTALL)/lib/$(TARGET)
|
||||
cp $(LIB_BUILD) $(INSTALL)/lib/$(LIB_BUILD)
|
||||
rm -rf $(INSTALL)/include/hsql
|
||||
cp -r src $(INSTALL)/include/hsql
|
||||
find $(INSTALL)/include/hsql -not -name '*.h' -type f | xargs rm
|
||||
|
||||
#################
|
||||
### Benchmark ###
|
||||
#################
|
||||
|
||||
benchmark: library
|
||||
$(GMAKE) -C benchmark/ clean run
|
||||
|
||||
build_benchmark: library
|
||||
$(GMAKE) -C benchmark/ parser_benchmark
|
||||
#######################################
|
||||
############## Benchmark ##############
|
||||
#######################################
|
||||
BM_BUILD = $(BIN)/benchmark
|
||||
BM_CFLAGS = -std=c++17 -Wall -Isrc/ -L./ $(OPT_FLAG)
|
||||
BM_PATH = benchmark
|
||||
BM_CPP = $(shell find $(BM_PATH)/ -name '*.cpp')
|
||||
BM_ALL = $(shell find $(BM_PATH)/ -name '*.cpp' -or -name '*.h')
|
||||
|
||||
############
|
||||
### Test ###
|
||||
############
|
||||
benchmark: $(BM_BUILD)
|
||||
|
||||
test: $(BIN)/sql_tests
|
||||
run_benchmarks: benchmark
|
||||
./$(BM_BUILD) --benchmark_counters_tabular=true
|
||||
# --benchmark_filter="abc
|
||||
|
||||
save_benchmarks: benchmark
|
||||
./$(BM_BUILD) --benchmark_format=csv > benchmarks.csv
|
||||
|
||||
$(BM_BUILD): $(BM_ALL) $(LIB_BUILD)
|
||||
@mkdir -p $(BIN)/
|
||||
$(CXX) $(BM_CFLAGS) $(BM_CPP) -o $(BM_BUILD) -lbenchmark -lpthread -lsqlparser -lstdc++ -lstdc++fs
|
||||
|
||||
|
||||
|
||||
########################################
|
||||
############ Test & Example ############
|
||||
########################################
|
||||
TEST_BUILD = $(BIN)/tests
|
||||
TEST_CFLAGS = -std=c++11 -Wall -Werror -Isrc/ -Itest/ -L./ $(OPT_FLAG)
|
||||
TEST_CPP = $(shell find test/ -name '*.cpp')
|
||||
TEST_ALL = $(shell find test/ -name '*.cpp') $(shell find test/ -name '*.h')
|
||||
EXAMPLE_SRC = $(shell find example/ -name '*.cpp') $(shell find example/ -name '*.h')
|
||||
|
||||
test: $(TEST_BUILD)
|
||||
bash test/test.sh
|
||||
|
||||
$(TEST_BUILD): $(TEST_ALL) $(LIB_BUILD)
|
||||
@mkdir -p $(BIN)/
|
||||
$(CXX) $(TEST_CFLAGS) $(TEST_CPP) -o $(TEST_BUILD) -lsqlparser -lstdc++
|
||||
|
||||
test_example:
|
||||
$(GMAKE) -C example/
|
||||
LD_LIBRARY_PATH=./ \
|
||||
./example/example "SELECT * FROM students WHERE name = 'Max Mustermann';"
|
||||
|
||||
test_format:
|
||||
@! astyle --options=astyle.options $(ALLLIB) | grep -q "Formatted"
|
||||
@! astyle --options=astyle.options $(ALLTEST) | grep -q "Formatted"
|
||||
|
||||
$(BIN)/sql_tests: library
|
||||
@mkdir -p $(BIN)/
|
||||
$(CXX) $(CTESTFLAGS) $(TESTCPP) -o $(BIN)/sql_tests -lsqlparser
|
||||
@! astyle --options=astyle.options $(LIB_ALL) | grep -q "Formatted"
|
||||
@! astyle --options=astyle.options $(TEST_ALL) | grep -q "Formatted"
|
||||
|
||||
|
||||
############
|
||||
### Misc ###
|
||||
############
|
||||
|
||||
########################################
|
||||
################# Misc #################
|
||||
########################################
|
||||
|
||||
format:
|
||||
astyle --options=astyle.options $(ALLLIB)
|
||||
astyle --options=astyle.options $(ALLTEST)
|
||||
astyle --options=astyle.options $(EXAMPLESRC)
|
||||
astyle --options=astyle.options $(LIB_ALL)
|
||||
astyle --options=astyle.options $(TEST_ALL)
|
||||
astyle --options=astyle.options $(EXAMPLE_SRC)
|
||||
|
||||
log_mode:
|
||||
@echo $(MODE_LOG)
|
||||
|
||||
|
|
|
@ -33,14 +33,14 @@ To use the SQL parser in your own projects you simply have to follow these few s
|
|||
|
||||
{
|
||||
// Basic Usage Example
|
||||
|
||||
|
||||
const std::string query = "...";
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parseSQLString(query, &result);
|
||||
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
|
||||
if (result.isValid() && result.size() > 0) {
|
||||
const hsql::SQLStatement* statement = result.getStatement(0);
|
||||
|
||||
|
||||
if (statement.isType(hsql::SelectStatement)) {
|
||||
const hsql::SelectStatement* select = (const hsql::SelectStatement*) statement;
|
||||
/* ... */
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
|
||||
SRC = ./
|
||||
CPP = $(shell find $(SRC) -name '*.cpp')
|
||||
|
||||
CFLAGS = -std=c++11 -lstdc++ -Wall -Werror -I../src/ -L../ -O3
|
||||
|
||||
all: parser_benchmark
|
||||
|
||||
run: parser_benchmark
|
||||
@export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../ &&\
|
||||
./parser_benchmark
|
||||
|
||||
parser_benchmark: $(CPP)
|
||||
$(CXX) $(CFLAGS) $(CPP) -o parser_benchmark -lbenchmark -lpthread -lsqlparser
|
||||
|
||||
clean:
|
||||
rm -f parser_benchmark
|
|
@ -12,23 +12,3 @@ make
|
|||
make install
|
||||
```
|
||||
|
||||
## Run the benchmarks
|
||||
|
||||
Build the libary from the parent directory and then execute:
|
||||
|
||||
```bash
|
||||
make run
|
||||
|
||||
# or manually...
|
||||
|
||||
make
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../
|
||||
./parser_benchmark
|
||||
```
|
||||
|
||||
... or run this from the parent directory:
|
||||
|
||||
```bash
|
||||
# From root of Git repository.
|
||||
make run_benchmark
|
||||
```
|
|
@ -0,0 +1,28 @@
|
|||
#include "benchmark/benchmark.h"
|
||||
|
||||
#include "benchmark_utils.h"
|
||||
#include "queries.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
// Create parse and tokenize benchmarks for TPC-H queries.
|
||||
const auto tpch_queries = getTPCHQueries();
|
||||
for (const auto& query : tpch_queries) {
|
||||
std::string p_name = query.first + "-parse";
|
||||
benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second);
|
||||
std::string t_name = query.first + "-tokenize";
|
||||
benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second);
|
||||
}
|
||||
|
||||
// Create parse and tokenize benchmarks for all queries in sql_queries array.
|
||||
for (unsigned i = 0; i < sql_queries.size(); ++i) {
|
||||
const auto& query = sql_queries[i];
|
||||
std::string p_name = getQueryName(i) + "-parse";
|
||||
benchmark::RegisterBenchmark(p_name.c_str(), &BM_ParseBenchmark, query.second);
|
||||
|
||||
std::string t_name = getQueryName(i) + "-tokenize";
|
||||
benchmark::RegisterBenchmark(t_name.c_str(), &BM_TokenizeBenchmark, query.second);
|
||||
}
|
||||
|
||||
benchmark::Initialize(&argc, argv);
|
||||
benchmark::RunSpecifiedBenchmarks();
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
#include "benchmark_utils.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "SQLParser.h"
|
||||
|
||||
size_t getNumTokens(const std::string& query) {
|
||||
std::vector<int16_t> tokens;
|
||||
hsql::SQLParser::tokenize(query, &tokens);
|
||||
return tokens.size();
|
||||
}
|
||||
|
||||
void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query) {
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
|
||||
while (st.KeepRunning()) {
|
||||
std::vector<int16_t> tokens(512);
|
||||
hsql::SQLParser::tokenize(query, &tokens);
|
||||
}
|
||||
}
|
||||
|
||||
void BM_ParseBenchmark(benchmark::State& st, const std::string& query) {
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
|
||||
while (st.KeepRunning()) {
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
if (!result.isValid()) {
|
||||
std::cout << query << std::endl;
|
||||
std::cout << result.errorMsg() << std::endl;
|
||||
st.SkipWithError("Parsing failed!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string readFileContents(const std::string& file_path) {
|
||||
std::ifstream t(file_path.c_str());
|
||||
std::string text((std::istreambuf_iterator<char>(t)),
|
||||
std::istreambuf_iterator<char>());
|
||||
return text;
|
||||
}
|
|
@ -1,6 +1,18 @@
|
|||
#ifndef __BENCHMARK_UTILS_H__
|
||||
#define __BENCHMARK_UTILS_H__
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
size_t getNumTokens(const std::string& query);
|
||||
|
||||
void BM_TokenizeBenchmark(benchmark::State& st, const std::string& query);
|
||||
|
||||
void BM_ParseBenchmark(benchmark::State& st, const std::string& query);
|
||||
|
||||
std::string readFileContents(const std::string& file_path);
|
||||
|
||||
|
||||
|
||||
|
||||
#define TIME_DIFF(end, start)\
|
||||
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
|
||||
|
@ -8,17 +20,22 @@
|
|||
#define NOW()\
|
||||
std::chrono::high_resolution_clock::now();
|
||||
|
||||
|
||||
|
||||
#define PARSE_QUERY_BENCHMARK(name, query)\
|
||||
static void name(benchmark::State& st) {\
|
||||
while (st.KeepRunning()) {\
|
||||
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);\
|
||||
if (!result->isValid()) st.SkipWithError("Parsing failed!");\
|
||||
delete result;\
|
||||
}\
|
||||
BM_ParseBenchmark(st, query);\
|
||||
}\
|
||||
BENCHMARK(name);
|
||||
|
||||
#define TOKENIZE_QUERY_BENCHMARK(name, query)\
|
||||
static void name(benchmark::State& st) {\
|
||||
BM_TokenizeBenchmark(st, query);\
|
||||
}\
|
||||
BENCHMARK(name);
|
||||
|
||||
|
||||
#define BENCHMARK_QUERY(test_name, query)\
|
||||
TOKENIZE_QUERY_BENCHMARK(test_name##Tokenize, query)\
|
||||
PARSE_QUERY_BENCHMARK(test_name##Parse, query)
|
||||
|
||||
|
||||
#endif
|
|
@ -9,67 +9,6 @@
|
|||
|
||||
#include "benchmark_utils.h"
|
||||
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_Q1SimpleSelect,
|
||||
"SELECT * FROM test;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_Q2SimpleSubSelect,
|
||||
"SELECT a, b AS address FROM (SELECT * FROM test WHERE c < 100 AND b > 3) t1 WHERE a < 10 AND b < 100;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_Q3SingleJoin,
|
||||
"SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON \"left\".a = \"right\".a;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_Q4TPCHQuery,
|
||||
"SELECT"
|
||||
" l_orderkey,"
|
||||
" SUM(l_extendedprice * (1 - l_discount)) AS revenue,"
|
||||
" o_orderdate,"
|
||||
" o_shippriority"
|
||||
" FROM"
|
||||
" customer,"
|
||||
" orders,"
|
||||
" lineitem"
|
||||
" WHERE"
|
||||
" c_mktsegment = '%s'"
|
||||
" and c_custkey = o_custkey"
|
||||
" and l_orderkey = o_orderkey"
|
||||
" and o_orderdate < '%s'"
|
||||
" and l_shipdate > '%s'"
|
||||
" GROUP BY"
|
||||
" l_orderkey,"
|
||||
" o_orderdate,"
|
||||
" o_shippriority"
|
||||
" ORDER BY"
|
||||
" revenue DESC,"
|
||||
" o_orderdate;"
|
||||
);
|
||||
PARSE_QUERY_BENCHMARK(BM_TwoSelects,
|
||||
"SELECT * FROM test; SELECT age, street AS address FROM data;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_LongSelectList26,
|
||||
"SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_LongSelectList52,
|
||||
"SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_LongSelectElement26,
|
||||
"SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_LongSelectElement52,
|
||||
"SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
|
||||
|
||||
// Prepare and Execute benchmarks.
|
||||
PARSE_QUERY_BENCHMARK(BM_ExecuteStatement,
|
||||
"EXECUTE procedure;");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_ExecuteWith2ParametersStatement,
|
||||
"EXECUTE procedure(11, 'test');");
|
||||
|
||||
PARSE_QUERY_BENCHMARK(BM_ExecuteWith10ParametersStatement,
|
||||
"EXECUTE procedure(11, 'test', 5.6, 4.2, 'abc', 6, 7, 8, 9, 10000);");
|
||||
|
||||
|
||||
|
||||
// Benchmark the influence of increasing size of the query, while
|
||||
// the number of tokens remains unchanged.
|
||||
static void BM_CharacterCount(benchmark::State& st) {
|
||||
|
@ -82,9 +21,11 @@ static void BM_CharacterCount(benchmark::State& st) {
|
|||
const std::string filler = std::string(pad, 'a');
|
||||
query.replace(7, 6, filler);
|
||||
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
while (st.KeepRunning()) {
|
||||
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);
|
||||
delete result;
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_CharacterCount)
|
||||
|
@ -129,10 +70,12 @@ static void BM_ConditionalTokens(benchmark::State& st) {
|
|||
return;
|
||||
}
|
||||
|
||||
st.counters["num_tokens"] = getNumTokens(query);
|
||||
st.counters["num_chars"] = query.size();
|
||||
while (st.KeepRunning()) {
|
||||
hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);
|
||||
if (!result->isValid()) st.SkipWithError("Parsing failed!");\
|
||||
delete result;
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
if (!result.isValid()) st.SkipWithError("Parsing failed!");
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ConditionalTokens)
|
||||
|
@ -141,4 +84,4 @@ BENCHMARK(BM_ConditionalTokens)
|
|||
{1 << 2, 1 << 11}});
|
||||
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
#include "queries.h"
|
||||
|
||||
#include <experimental/filesystem>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
|
||||
#include "benchmark_utils.h"
|
||||
|
||||
namespace filesystem = std::experimental::filesystem;
|
||||
|
||||
std::string getQueryName(unsigned i) {
|
||||
if (sql_queries[i].first.empty()) {
|
||||
std::string name = "#" + std::to_string(i + 1);
|
||||
return name;
|
||||
}
|
||||
return std::string("") + sql_queries[i].first;
|
||||
}
|
||||
|
||||
std::vector<SQLQuery> getQueriesFromDirectory(const std::string& dir_path) {
|
||||
std::regex query_file_regex("\\.sql$");
|
||||
std::vector<std::string> files;
|
||||
|
||||
for (auto& entry : filesystem::directory_iterator(dir_path)) {
|
||||
if (filesystem::is_regular_file(entry)) {
|
||||
std::string path_str = filesystem::path(entry);
|
||||
|
||||
if (std::regex_search(path_str, query_file_regex)) {
|
||||
files.push_back(path_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(files.begin(), files.end());
|
||||
|
||||
std::vector<SQLQuery> queries;
|
||||
for (const std::string& file_path : files) {
|
||||
const filesystem::path p(file_path);
|
||||
const std::string query = readFileContents(file_path);
|
||||
queries.emplace_back(p.filename(), query);
|
||||
}
|
||||
return queries;
|
||||
}
|
||||
|
||||
std::vector<SQLQuery> getTPCHQueries() {
|
||||
return getQueriesFromDirectory("test/queries/");
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
#ifndef __QUERIES_H__
|
||||
#define __QUERIES_H__
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
typedef std::pair<std::string, std::string> SQLQuery;
|
||||
|
||||
// name, query
|
||||
static std::vector<SQLQuery> sql_queries = {
|
||||
{"Q1", "SELECT * FROM test;"},
|
||||
{"Q2", "SELECT a, b AS address FROM (SELECT * FROM test WHERE c < 100 AND b > 3) t1 WHERE a < 10 AND b < 100;"},
|
||||
{"Q3", "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON \"left\".a = \"right\".a;"},
|
||||
{"Q4", ""
|
||||
"SELECT"
|
||||
" l_orderkey,"
|
||||
" SUM(l_extendedprice * (1 - l_discount)) AS revenue,"
|
||||
" o_orderdate,"
|
||||
" o_shippriority"
|
||||
" FROM"
|
||||
" customer,"
|
||||
" orders,"
|
||||
" lineitem"
|
||||
" WHERE"
|
||||
" c_mktsegment = '%s'"
|
||||
" and c_custkey = o_custkey"
|
||||
" and l_orderkey = o_orderkey"
|
||||
" and o_orderdate < '%s'"
|
||||
" and l_shipdate > '%s'"
|
||||
" GROUP BY"
|
||||
" l_orderkey,"
|
||||
" o_orderdate,"
|
||||
" o_shippriority"
|
||||
" ORDER BY"
|
||||
" revenue DESC,"
|
||||
" o_orderdate;"
|
||||
},
|
||||
|
||||
{"LongSelectList26", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"},
|
||||
{"LongSelectElement26", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"},
|
||||
{"LongSelectList52", "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;"},
|
||||
{"LongSelectElement52", "SELECT abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxy FROM test;"},
|
||||
{"TwoSelects", "SELECT * FROM test; SELECT age, street AS address FROM data;"},
|
||||
{"ExecuteNoParams", "EXECUTE procedure;"},
|
||||
{"Execute2Params", "EXECUTE procedure(11, 'test');"},
|
||||
{"Execute10Params", "EXECUTE procedure(11, 'test', 5.6, 4.2, 'abc', 6, 7, 8, 9, 10000);"},
|
||||
// {"name", "query"},
|
||||
};
|
||||
|
||||
std::string getQueryName(unsigned i);
|
||||
|
||||
std::vector<SQLQuery> getQueriesFromDirectory(const std::string& dir_path);
|
||||
|
||||
std::vector<SQLQuery> getTPCHQueries();
|
||||
|
||||
#endif
|
|
@ -17,7 +17,7 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
// parse a given query
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parseSQLString(query, &result);
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
|
||||
// check whether the parsing was successful
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include <stdio.h>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace hsql {
|
||||
|
||||
SQLParser::SQLParser() {
|
||||
|
@ -13,16 +12,16 @@ namespace hsql {
|
|||
}
|
||||
|
||||
// static
|
||||
bool SQLParser::parseSQLString(const char* text, SQLParserResult* result) {
|
||||
bool SQLParser::parse(const std::string& sql, SQLParserResult* result) {
|
||||
yyscan_t scanner;
|
||||
YY_BUFFER_STATE state;
|
||||
|
||||
if (hsql_lex_init(&scanner)) {
|
||||
// Couldn't initialize the lexer.
|
||||
fprintf(stderr, "[Error] SQLParser: Error when initializing lexer!\n");
|
||||
fprintf(stderr, "SQLParser: Error when initializing lexer!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* text = sql.c_str();
|
||||
state = hsql__scan_string(text, scanner);
|
||||
|
||||
// Parse the tokens.
|
||||
|
@ -38,25 +37,44 @@ namespace hsql {
|
|||
}
|
||||
|
||||
// static
|
||||
bool SQLParser::parseSQLString(const std::string& text, SQLParserResult* result) {
|
||||
return parseSQLString(text.c_str(), result);
|
||||
bool SQLParser::parseSQLString(const char* sql, SQLParserResult* result) {
|
||||
return parse(sql, result);
|
||||
}
|
||||
|
||||
bool SQLParser::parseSQLString(const std::string& sql, SQLParserResult* result) {
|
||||
return parse(sql, result);
|
||||
}
|
||||
|
||||
// static
|
||||
SQLParserResult* SQLParser::parseSQLString(const char* text) {
|
||||
SQLParserResult* result = new SQLParserResult();
|
||||
|
||||
if (!SQLParser::parseSQLString(text, result)) {
|
||||
delete result;
|
||||
return nullptr;
|
||||
bool SQLParser::tokenize(const std::string& sql, std::vector<int16_t>* tokens) {
|
||||
// Initialize the scanner.
|
||||
yyscan_t scanner;
|
||||
if (hsql_lex_init(&scanner)) {
|
||||
fprintf(stderr, "SQLParser: Error when initializing lexer!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
YY_BUFFER_STATE state;
|
||||
state = hsql__scan_string(sql.c_str(), scanner);
|
||||
|
||||
// static
|
||||
SQLParserResult* SQLParser::parseSQLString(const std::string& text) {
|
||||
return parseSQLString(text.c_str());
|
||||
YYSTYPE yylval;
|
||||
YYLTYPE yylloc;
|
||||
|
||||
// Step through the string until EOF is read.
|
||||
// Note: hsql_lex returns int, but we know that its range is within 16 bit.
|
||||
int16_t token = hsql_lex(&yylval, &yylloc, scanner);
|
||||
while (token != 0) {
|
||||
tokens->push_back(token);
|
||||
token = hsql_lex(&yylval, &yylloc, scanner);
|
||||
|
||||
if (token == SQL_IDENTIFIER || token == SQL_STRING) {
|
||||
free(yylval.sval);
|
||||
}
|
||||
}
|
||||
|
||||
hsql__delete_buffer(state, scanner);
|
||||
hsql_lex_destroy(scanner);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace hsql
|
||||
|
|
|
@ -9,29 +9,25 @@ namespace hsql {
|
|||
// Static methods used to parse SQL strings.
|
||||
class SQLParser {
|
||||
public:
|
||||
|
||||
// Parses a given constant character SQL string into the result object.
|
||||
// Returns true if the lexer and parser could run without internal errors.
|
||||
// This does NOT mean that the SQL string was valid SQL. To check that
|
||||
// you need to check result->isValid();
|
||||
static bool parse(const std::string& sql, SQLParserResult* result);
|
||||
|
||||
// Run tokenization on the given string and store the tokens in the output vector.
|
||||
static bool tokenize(const std::string& sql, std::vector<int16_t>* tokens);
|
||||
|
||||
// Deprecated.
|
||||
// Old method to parse SQL strings. Replaced by parse().
|
||||
static bool parseSQLString(const char* sql, SQLParserResult* result);
|
||||
|
||||
// Parses a given SQL string into the result object.
|
||||
// Deprecated.
|
||||
// Old method to parse SQL strings. Replaced by parse().
|
||||
static bool parseSQLString(const std::string& sql, SQLParserResult* result);
|
||||
|
||||
// Deprecated:
|
||||
// Parses a given constant character SQL string.
|
||||
// Note: This is kept for legacy reasons. It is recommended to use
|
||||
// the (const char*, SQLParserResult*) implementation.
|
||||
static SQLParserResult* parseSQLString(const char* sql);
|
||||
|
||||
// Deprecated:
|
||||
// Parses an SQL std::string.
|
||||
// Note: This is kept for legacy reasons. It is recommended to use
|
||||
// the (const std::string&, SQLParserResult*) implementation.
|
||||
static SQLParserResult* parseSQLString(const std::string& sql);
|
||||
|
||||
private:
|
||||
// Static class can't be instatiated.
|
||||
SQLParser();
|
||||
};
|
||||
|
||||
|
|
|
@ -100,7 +100,9 @@ namespace hsql {
|
|||
void SQLParserResult::addParameter(Expr* parameter) {
|
||||
parameters_.push_back(parameter);
|
||||
std::sort(parameters_.begin(), parameters_.end(),
|
||||
[](const Expr* a, const Expr* b) { return a->ival < b->ival; });
|
||||
[](const Expr * a, const Expr * b) {
|
||||
return a->ival < b->ival;
|
||||
});
|
||||
}
|
||||
|
||||
const std::vector<Expr*>& SQLParserResult::parameters() {
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
#ifndef __SQLPARSER__SQLSTATEMENT_H__
|
||||
#define __SQLPARSER__SQLSTATEMENT_H__
|
||||
|
||||
#include "Expr.h"
|
||||
#include <vector>
|
||||
|
||||
#include "Expr.h"
|
||||
|
||||
namespace hsql {
|
||||
enum StatementType {
|
||||
kStmtError, // unused
|
||||
|
|
|
@ -3,7 +3,7 @@ SELECT N_NAME, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS REVENUE
|
|||
FROM CUSTOMER, ORDERS, LINEITEM, SUPPLIER, NATION, REGION
|
||||
WHERE C_CUSTKEY = O_CUSTKEY AND L_ORDERKEY = O_ORDERKEY AND L_SUPPKEY = S_SUPPKEY
|
||||
AND C_NATIONKEY = S_NATIONKEY AND S_NATIONKEY = N_NATIONKEY AND N_REGIONKEY = R_REGIONKEY
|
||||
AND R_NAME = 'ASIA' AND O_ORDERDATE >= '1994-01-01'
|
||||
AND R_NAME = 'ASIA' AND O_ORDERDATE >= '1994-01-01'
|
||||
AND O_ORDERDATE < DATEADD(YY, 1, cast('1994-01-01' as datetime))
|
||||
GROUP BY N_NAME
|
||||
ORDER BY REVENUE DESC
|
|
@ -1,58 +0,0 @@
|
|||
-- From:
|
||||
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
|
||||
|
||||
-- TPC_H Query 11 - Important Stock Identification
|
||||
SELECT PS_PARTKEY, SUM(PS_SUPPLYCOST*PS_AVAILQTY) AS VALUE
|
||||
FROM PARTSUPP, SUPPLIER, NATION
|
||||
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY'
|
||||
GROUP BY PS_PARTKEY
|
||||
HAVING SUM(PS_SUPPLYCOST*PS_AVAILQTY) > (SELECT SUM(PS_SUPPLYCOST*PS_AVAILQTY) * 0.0001000000
|
||||
FROM PARTSUPP, SUPPLIER, NATION
|
||||
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY')
|
||||
ORDER BY VALUE DESC;
|
||||
|
||||
|
||||
-- TPC_H Query 12 - Shipping Modes and Order Priority
|
||||
SELECT L_SHIPMODE,
|
||||
SUM(CASE WHEN O_ORDERPRIORITY = '1-URGENT' OR O_ORDERPRIORITY = '2-HIGH' THEN 1 ELSE 0 END) AS HIGH_LINE_COUNT,
|
||||
SUM(CASE WHEN O_ORDERPRIORITY <> '1-URGENT' AND O_ORDERPRIORITY <> '2-HIGH' THEN 1 ELSE 0 END ) AS LOW_LINE_COUNT
|
||||
FROM ORDERS, LINEITEM
|
||||
WHERE O_ORDERKEY = L_ORDERKEY AND L_SHIPMODE IN ('MAIL','SHIP')
|
||||
AND L_COMMITDATE < L_RECEIPTDATE AND L_SHIPDATE < L_COMMITDATE AND L_RECEIPTDATE >= '1994-01-01'
|
||||
AND L_RECEIPTDATE < dateadd(mm, 1, cast('1995-09-01' as datetime))
|
||||
GROUP BY L_SHIPMODE
|
||||
ORDER BY L_SHIPMODE;
|
||||
|
||||
|
||||
-- TPC_H Query 13 - Customer Distribution
|
||||
SELECT C_COUNT, COUNT(*) AS CUSTDIST
|
||||
FROM (SELECT C_CUSTKEY, COUNT(O_ORDERKEY)
|
||||
FROM CUSTOMER left outer join ORDERS on C_CUSTKEY = O_CUSTKEY
|
||||
AND O_COMMENT not like '%%special%%requests%%'
|
||||
GROUP BY C_CUSTKEY) AS C_ORDERS
|
||||
GROUP BY C_COUNT
|
||||
ORDER BY CUSTDIST DESC, C_COUNT DESC;
|
||||
|
||||
|
||||
-- TPC_H Query 14 - Promotion Effect
|
||||
SELECT 100.00* SUM(CASE WHEN P_TYPE LIKE 'PROMO%%' THEN L_EXTENDEDPRICE*(1-L_DISCOUNT)
|
||||
ELSE 0 END) / SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS PROMO_REVENUE
|
||||
FROM LINEITEM, "PART"
|
||||
WHERE L_PARTKEY = P_PARTKEY AND L_SHIPDATE >= '1995-09-01' AND L_SHIPDATE < dateadd(mm, 1, '1995-09-01');
|
||||
|
||||
|
||||
-- TPC_H Query 15.1 - Create View for Top Supplier Query
|
||||
CREATE VIEW REVENUE0 (SUPPLIER_NO, TOTAL_REVENUE) AS
|
||||
SELECT L_SUPPKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) FROM LINEITEM
|
||||
WHERE L_SHIPDATE >= '1996-01-01' AND L_SHIPDATE < dateadd(mm, 3, cast('1996-01-01' as datetime))
|
||||
GROUP BY L_SUPPKEY;
|
||||
|
||||
|
||||
-- TPC_H Query 15.2 - Top Supplier
|
||||
SELECT S_SUPPKEY, S_NAME, S_ADDRESS, S_PHONE, TOTAL_REVENUE
|
||||
FROM SUPPLIER, REVENUE0
|
||||
WHERE S_SUPPKEY = SUPPLIER_NO AND TOTAL_REVENUE = (SELECT MAX(TOTAL_REVENUE) FROM REVENUE0)
|
||||
ORDER BY S_SUPPKEY;
|
||||
|
||||
-- TPC_H Query 15.3 - Drop View
|
||||
DROP VIEW REVENUE0;
|
|
@ -0,0 +1,10 @@
|
|||
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
|
||||
-- TPC_H Query 11 - Important Stock Identification
|
||||
SELECT PS_PARTKEY, SUM(PS_SUPPLYCOST*PS_AVAILQTY) AS VALUE
|
||||
FROM PARTSUPP, SUPPLIER, NATION
|
||||
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY'
|
||||
GROUP BY PS_PARTKEY
|
||||
HAVING SUM(PS_SUPPLYCOST*PS_AVAILQTY) > (SELECT SUM(PS_SUPPLYCOST*PS_AVAILQTY) * 0.0001000000
|
||||
FROM PARTSUPP, SUPPLIER, NATION
|
||||
WHERE PS_SUPPKEY = S_SUPPKEY AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'GERMANY')
|
||||
ORDER BY VALUE DESC;
|
|
@ -0,0 +1,10 @@
|
|||
-- TPC_H Query 12 - Shipping Modes and Order Priority
|
||||
SELECT L_SHIPMODE,
|
||||
SUM(CASE WHEN O_ORDERPRIORITY = '1-URGENT' OR O_ORDERPRIORITY = '2-HIGH' THEN 1 ELSE 0 END) AS HIGH_LINE_COUNT,
|
||||
SUM(CASE WHEN O_ORDERPRIORITY <> '1-URGENT' AND O_ORDERPRIORITY <> '2-HIGH' THEN 1 ELSE 0 END ) AS LOW_LINE_COUNT
|
||||
FROM ORDERS, LINEITEM
|
||||
WHERE O_ORDERKEY = L_ORDERKEY AND L_SHIPMODE IN ('MAIL','SHIP')
|
||||
AND L_COMMITDATE < L_RECEIPTDATE AND L_SHIPDATE < L_COMMITDATE AND L_RECEIPTDATE >= '1994-01-01'
|
||||
AND L_RECEIPTDATE < dateadd(mm, 1, cast('1995-09-01' as datetime))
|
||||
GROUP BY L_SHIPMODE
|
||||
ORDER BY L_SHIPMODE;
|
|
@ -0,0 +1,8 @@
|
|||
-- TPC_H Query 13 - Customer Distribution
|
||||
SELECT C_COUNT, COUNT(*) AS CUSTDIST
|
||||
FROM (SELECT C_CUSTKEY, COUNT(O_ORDERKEY)
|
||||
FROM CUSTOMER left outer join ORDERS on C_CUSTKEY = O_CUSTKEY
|
||||
AND O_COMMENT not like '%%special%%requests%%'
|
||||
GROUP BY C_CUSTKEY) AS C_ORDERS
|
||||
GROUP BY C_COUNT
|
||||
ORDER BY CUSTDIST DESC, C_COUNT DESC;
|
|
@ -0,0 +1,5 @@
|
|||
-- TPC_H Query 14 - Promotion Effect
|
||||
SELECT 100.00* SUM(CASE WHEN P_TYPE LIKE 'PROMO%%' THEN L_EXTENDEDPRICE*(1-L_DISCOUNT)
|
||||
ELSE 0 END) / SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS PROMO_REVENUE
|
||||
FROM LINEITEM, "PART"
|
||||
WHERE L_PARTKEY = P_PARTKEY AND L_SHIPDATE >= '1995-09-01' AND L_SHIPDATE < dateadd(mm, 1, '1995-09-01');
|
|
@ -0,0 +1,15 @@
|
|||
-- TPC_H Query 15.1 - Create View for Top Supplier Query
|
||||
CREATE VIEW REVENUE0 (SUPPLIER_NO, TOTAL_REVENUE) AS
|
||||
SELECT L_SUPPKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) FROM LINEITEM
|
||||
WHERE L_SHIPDATE >= '1996-01-01' AND L_SHIPDATE < dateadd(mm, 3, cast('1996-01-01' as datetime))
|
||||
GROUP BY L_SUPPKEY;
|
||||
|
||||
|
||||
-- TPC_H Query 15.2 - Top Supplier
|
||||
SELECT S_SUPPKEY, S_NAME, S_ADDRESS, S_PHONE, TOTAL_REVENUE
|
||||
FROM SUPPLIER, REVENUE0
|
||||
WHERE S_SUPPKEY = SUPPLIER_NO AND TOTAL_REVENUE = (SELECT MAX(TOTAL_REVENUE) FROM REVENUE0)
|
||||
ORDER BY S_SUPPKEY;
|
||||
|
||||
-- TPC_H Query 15.3 - Drop View
|
||||
DROP VIEW REVENUE0;
|
|
@ -1,71 +0,0 @@
|
|||
-- From:
|
||||
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
|
||||
|
||||
-- TPC_H Query 16 - Parts/Supplier Relationship
|
||||
SELECT P_BRAND, P_TYPE, P_SIZE, COUNT(DISTINCT PS_SUPPKEY) AS SUPPLIER_CNT
|
||||
FROM PARTSUPP, "PART"
|
||||
WHERE P_PARTKEY = PS_PARTKEY AND P_BRAND <> 'Brand#45' AND P_TYPE NOT LIKE 'MEDIUM POLISHED%%'
|
||||
AND P_SIZE IN (49, 14, 23, 45, 19, 3, 36, 9) AND PS_SUPPKEY NOT IN (SELECT S_SUPPKEY FROM SUPPLIER
|
||||
WHERE S_COMMENT LIKE '%%Customer%%Complaints%%')
|
||||
GROUP BY P_BRAND, P_TYPE, P_SIZE
|
||||
ORDER BY SUPPLIER_CNT DESC, P_BRAND, P_TYPE, P_SIZE;
|
||||
|
||||
|
||||
-- TPC_H Query 17 - Small-Quantity-Order Revenue
|
||||
SELECT SUM(L_EXTENDEDPRICE)/7.0 AS AVG_YEARLY FROM LINEITEM, "PART"
|
||||
WHERE P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#23' AND P_CONTAINER = 'MED BOX'
|
||||
AND L_QUANTITY < (SELECT 0.2*AVG(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = P_PARTKEY);
|
||||
|
||||
|
||||
-- TPC_H Query 18 - Large Volume Customer
|
||||
SELECT TOP 100 C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE, SUM(L_QUANTITY)
|
||||
FROM CUSTOMER, ORDERS, LINEITEM
|
||||
WHERE O_ORDERKEY IN (SELECT L_ORDERKEY FROM LINEITEM GROUP BY L_ORDERKEY HAVING
|
||||
SUM(L_QUANTITY) > 300) AND C_CUSTKEY = O_CUSTKEY AND O_ORDERKEY = L_ORDERKEY
|
||||
GROUP BY C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE
|
||||
ORDER BY O_TOTALPRICE DESC, O_ORDERDATE;
|
||||
|
||||
|
||||
-- TPC_H Query 19 - Discounted Revenue
|
||||
SELECT SUM(L_EXTENDEDPRICE* (1 - L_DISCOUNT)) AS REVENUE
|
||||
FROM LINEITEM, "PART"
|
||||
WHERE (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#12' AND P_CONTAINER IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND L_QUANTITY >= 1 AND L_QUANTITY <= 1 + 10 AND P_SIZE BETWEEN 1 AND 5
|
||||
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
|
||||
OR (P_PARTKEY = L_PARTKEY AND P_BRAND ='Brand#23' AND P_CONTAINER IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND L_QUANTITY >=10 AND L_QUANTITY <=10 + 10 AND P_SIZE BETWEEN 1 AND 10
|
||||
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
|
||||
OR (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#34' AND P_CONTAINER IN ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND L_QUANTITY >=20 AND L_QUANTITY <= 20 + 10 AND P_SIZE BETWEEN 1 AND 15
|
||||
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON');
|
||||
|
||||
|
||||
-- TPC_H Query 20 - Potential Part Promotion
|
||||
SELECT S_NAME, S_ADDRESS FROM SUPPLIER, NATION
|
||||
WHERE S_SUPPKEY IN (SELECT PS_SUPPKEY FROM PARTSUPP
|
||||
WHERE PS_PARTKEY in (SELECT P_PARTKEY FROM "PART" WHERE P_NAME like 'forest%%') AND
|
||||
PS_AVAILQTY > (SELECT 0.5*sum(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = PS_PARTKEY AND
|
||||
L_SUPPKEY = PS_SUPPKEY AND L_SHIPDATE >= '1994-01-01' AND
|
||||
L_SHIPDATE < dateadd(yy,1,'1994-01-01'))) AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'CANADA'
|
||||
ORDER BY S_NAME;
|
||||
|
||||
|
||||
-- TPC_H Query 21 - Suppliers Who Kept Orders Waiting
|
||||
SELECT TOP 100 S_NAME, COUNT(*) AS NUMWAIT
|
||||
FROM SUPPLIER, LINEITEM L1, ORDERS, NATION WHERE S_SUPPKEY = L1.L_SUPPKEY AND
|
||||
O_ORDERKEY = L1.L_ORDERKEY AND O_ORDERSTATUS = 'F' AND L1.L_RECEIPTDATE> L1.L_COMMITDATE
|
||||
AND EXISTS (SELECT * FROM LINEITEM L2 WHERE L2.L_ORDERKEY = L1.L_ORDERKEY
|
||||
AND L2.L_SUPPKEY <> L1.L_SUPPKEY) AND
|
||||
NOT EXISTS (SELECT * FROM LINEITEM L3 WHERE L3.L_ORDERKEY = L1.L_ORDERKEY AND
|
||||
L3.L_SUPPKEY <> L1.L_SUPPKEY AND L3.L_RECEIPTDATE > L3.L_COMMITDATE) AND
|
||||
S_NATIONKEY = N_NATIONKEY AND N_NAME = 'SAUDI ARABIA'
|
||||
GROUP BY S_NAME
|
||||
ORDER BY NUMWAIT DESC, S_NAME;
|
||||
|
||||
|
||||
-- TPC_H Query 22 - Global Sales Opportunity */
|
||||
SELECT CNTRYCODE, COUNT(*) AS NUMCUST, SUM(C_ACCTBAL) AS TOTACCTBAL
|
||||
FROM (SELECT SUBSTRING(C_PHONE,1,2) AS CNTRYCODE, C_ACCTBAL
|
||||
FROM CUSTOMER WHERE SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17') AND
|
||||
C_ACCTBAL > (SELECT AVG(C_ACCTBAL) FROM CUSTOMER WHERE C_ACCTBAL > 0.00 AND
|
||||
SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17')) AND
|
||||
NOT EXISTS ( SELECT * FROM ORDERS WHERE O_CUSTKEY = C_CUSTKEY)) AS CUSTSALE
|
||||
GROUP BY CNTRYCODE
|
||||
ORDER BY CNTRYCODE;
|
|
@ -0,0 +1,9 @@
|
|||
-- http://www.sqlserver-dba.com/2011/09/this-is-a-followup-on-my-earlier-post-of-sql-server-test-data-generation-testing-tools-i-had-some-requests-for-my-set-up-pr.html
|
||||
-- TPC_H Query 16 - Parts/Supplier Relationship
|
||||
SELECT P_BRAND, P_TYPE, P_SIZE, COUNT(DISTINCT PS_SUPPKEY) AS SUPPLIER_CNT
|
||||
FROM PARTSUPP, "PART"
|
||||
WHERE P_PARTKEY = PS_PARTKEY AND P_BRAND <> 'Brand#45' AND P_TYPE NOT LIKE 'MEDIUM POLISHED%%'
|
||||
AND P_SIZE IN (49, 14, 23, 45, 19, 3, 36, 9) AND PS_SUPPKEY NOT IN (SELECT S_SUPPKEY FROM SUPPLIER
|
||||
WHERE S_COMMENT LIKE '%%Customer%%Complaints%%')
|
||||
GROUP BY P_BRAND, P_TYPE, P_SIZE
|
||||
ORDER BY SUPPLIER_CNT DESC, P_BRAND, P_TYPE, P_SIZE;
|
|
@ -0,0 +1,4 @@
|
|||
-- TPC_H Query 17 - Small-Quantity-Order Revenue
|
||||
SELECT SUM(L_EXTENDEDPRICE)/7.0 AS AVG_YEARLY FROM LINEITEM, "PART"
|
||||
WHERE P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#23' AND P_CONTAINER = 'MED BOX'
|
||||
AND L_QUANTITY < (SELECT 0.2*AVG(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = P_PARTKEY);
|
|
@ -0,0 +1,7 @@
|
|||
-- TPC_H Query 18 - Large Volume Customer
|
||||
SELECT TOP 100 C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE, SUM(L_QUANTITY)
|
||||
FROM CUSTOMER, ORDERS, LINEITEM
|
||||
WHERE O_ORDERKEY IN (SELECT L_ORDERKEY FROM LINEITEM GROUP BY L_ORDERKEY HAVING
|
||||
SUM(L_QUANTITY) > 300) AND C_CUSTKEY = O_CUSTKEY AND O_ORDERKEY = L_ORDERKEY
|
||||
GROUP BY C_NAME, C_CUSTKEY, O_ORDERKEY, O_ORDERDATE, O_TOTALPRICE
|
||||
ORDER BY O_TOTALPRICE DESC, O_ORDERDATE;
|
|
@ -0,0 +1,9 @@
|
|||
-- TPC_H Query 19 - Discounted Revenue
|
||||
SELECT SUM(L_EXTENDEDPRICE* (1 - L_DISCOUNT)) AS REVENUE
|
||||
FROM LINEITEM, "PART"
|
||||
WHERE (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#12' AND P_CONTAINER IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND L_QUANTITY >= 1 AND L_QUANTITY <= 1 + 10 AND P_SIZE BETWEEN 1 AND 5
|
||||
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
|
||||
OR (P_PARTKEY = L_PARTKEY AND P_BRAND ='Brand#23' AND P_CONTAINER IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND L_QUANTITY >=10 AND L_QUANTITY <=10 + 10 AND P_SIZE BETWEEN 1 AND 10
|
||||
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON')
|
||||
OR (P_PARTKEY = L_PARTKEY AND P_BRAND = 'Brand#34' AND P_CONTAINER IN ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND L_QUANTITY >=20 AND L_QUANTITY <= 20 + 10 AND P_SIZE BETWEEN 1 AND 15
|
||||
AND L_SHIPMODE IN ('AIR', 'AIR REG') AND L_SHIPINSTRUCT = 'DELIVER IN PERSON');
|
|
@ -0,0 +1,8 @@
|
|||
-- TPC_H Query 20 - Potential Part Promotion
|
||||
SELECT S_NAME, S_ADDRESS FROM SUPPLIER, NATION
|
||||
WHERE S_SUPPKEY IN (SELECT PS_SUPPKEY FROM PARTSUPP
|
||||
WHERE PS_PARTKEY in (SELECT P_PARTKEY FROM "PART" WHERE P_NAME like 'forest%%') AND
|
||||
PS_AVAILQTY > (SELECT 0.5*sum(L_QUANTITY) FROM LINEITEM WHERE L_PARTKEY = PS_PARTKEY AND
|
||||
L_SUPPKEY = PS_SUPPKEY AND L_SHIPDATE >= '1994-01-01' AND
|
||||
L_SHIPDATE < dateadd(yy,1,'1994-01-01'))) AND S_NATIONKEY = N_NATIONKEY AND N_NAME = 'CANADA'
|
||||
ORDER BY S_NAME;
|
|
@ -0,0 +1,11 @@
|
|||
-- TPC_H Query 21 - Suppliers Who Kept Orders Waiting
|
||||
SELECT TOP 100 S_NAME, COUNT(*) AS NUMWAIT
|
||||
FROM SUPPLIER, LINEITEM L1, ORDERS, NATION WHERE S_SUPPKEY = L1.L_SUPPKEY AND
|
||||
O_ORDERKEY = L1.L_ORDERKEY AND O_ORDERSTATUS = 'F' AND L1.L_RECEIPTDATE> L1.L_COMMITDATE
|
||||
AND EXISTS (SELECT * FROM LINEITEM L2 WHERE L2.L_ORDERKEY = L1.L_ORDERKEY
|
||||
AND L2.L_SUPPKEY <> L1.L_SUPPKEY) AND
|
||||
NOT EXISTS (SELECT * FROM LINEITEM L3 WHERE L3.L_ORDERKEY = L1.L_ORDERKEY AND
|
||||
L3.L_SUPPKEY <> L1.L_SUPPKEY AND L3.L_RECEIPTDATE > L3.L_COMMITDATE) AND
|
||||
S_NATIONKEY = N_NATIONKEY AND N_NAME = 'SAUDI ARABIA'
|
||||
GROUP BY S_NAME
|
||||
ORDER BY NUMWAIT DESC, S_NAME;
|
|
@ -0,0 +1,9 @@
|
|||
-- TPC_H Query 22 - Global Sales Opportunity */
|
||||
SELECT CNTRYCODE, COUNT(*) AS NUMCUST, SUM(C_ACCTBAL) AS TOTACCTBAL
|
||||
FROM (SELECT SUBSTRING(C_PHONE,1,2) AS CNTRYCODE, C_ACCTBAL
|
||||
FROM CUSTOMER WHERE SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17') AND
|
||||
C_ACCTBAL > (SELECT AVG(C_ACCTBAL) FROM CUSTOMER WHERE C_ACCTBAL > 0.00 AND
|
||||
SUBSTRING(C_PHONE,1,2) IN ('13', '31', '23', '29', '30', '18', '17')) AND
|
||||
NOT EXISTS ( SELECT * FROM ORDERS WHERE O_CUSTKEY = C_CUSTKEY)) AS CUSTSALE
|
||||
GROUP BY CNTRYCODE
|
||||
ORDER BY CNTRYCODE;
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
#define TEST_PARSE_SQL_QUERY(query, result, numStatements) \
|
||||
hsql::SQLParserResult result; \
|
||||
hsql::SQLParser::parseSQLString(query, &result); \
|
||||
hsql::SQLParser::parse(query, &result); \
|
||||
ASSERT(result.isValid()); \
|
||||
ASSERT_EQ(result.size(), numStatements);
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ TEST(AutoGrammarTest) {
|
|||
|
||||
// Parsing
|
||||
SQLParserResult result;
|
||||
SQLParser::parseSQLString(sql.c_str(), &result);
|
||||
SQLParser::parse(sql.c_str(), &result);
|
||||
|
||||
end = std::chrono::system_clock::now();
|
||||
std::chrono::duration<double> elapsed_seconds = end - start;
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
#include "thirdparty/microtest/microtest.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "sql_asserts.h"
|
||||
#include "SQLParser.h"
|
||||
#include "parser/bison_parser.h"
|
||||
|
||||
using namespace hsql;
|
||||
|
||||
void test_tokens(const std::string& query, const std::vector<int16_t>& expected_tokens) {
|
||||
std::vector<int16_t> tokens;
|
||||
ASSERT(SQLParser::tokenize(query, &tokens));
|
||||
|
||||
ASSERT_EQ(expected_tokens.size(), tokens.size());
|
||||
|
||||
for (unsigned i = 0; i < expected_tokens.size(); ++i) {
|
||||
ASSERT_EQ(expected_tokens[i], tokens[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SQLParserTokenizeTest) {
|
||||
test_tokens("SELECT * FROM test;", { SQL_SELECT, '*', SQL_FROM, SQL_IDENTIFIER, ';' });
|
||||
test_tokens("SELECT a, 'b' FROM test WITH HINT;", { SQL_SELECT, SQL_IDENTIFIER, ',', SQL_STRING, SQL_FROM, SQL_IDENTIFIER, SQL_WITH, SQL_HINT, ';' });
|
||||
}
|
||||
|
||||
TEST(SQLParserTokenizeStringifyTest) {
|
||||
const std::string query = "SELECT * FROM test;";
|
||||
std::vector<int16_t> tokens;
|
||||
ASSERT(SQLParser::tokenize(query, &tokens));
|
||||
|
||||
// Make u16string.
|
||||
std::u16string token_string(tokens.cbegin(), tokens.cend());
|
||||
|
||||
// Check if u16 string is cacheable.
|
||||
std::map<std::u16string, std::string> cache;
|
||||
cache[token_string] = query;
|
||||
|
||||
ASSERT(query == cache[token_string]);
|
||||
ASSERT(&query != &cache[token_string]);
|
||||
}
|
||||
|
|
@ -14,7 +14,7 @@ using namespace hsql;
|
|||
|
||||
TEST(DeleteStatementTest) {
|
||||
SQLParserResult result;
|
||||
SQLParser::parseSQLString("DELETE FROM students WHERE grade > 2.0;", &result);
|
||||
SQLParser::parse("DELETE FROM students WHERE grade > 2.0;", &result);
|
||||
|
||||
ASSERT(result.isValid());
|
||||
ASSERT_EQ(result.size(), 1);
|
||||
|
@ -30,7 +30,7 @@ TEST(DeleteStatementTest) {
|
|||
|
||||
TEST(CreateStatementTest) {
|
||||
SQLParserResult result;
|
||||
SQLParser::parseSQLString("CREATE TABLE students (name TEXT, student_number INT, city INTEGER, grade DOUBLE)", &result);
|
||||
SQLParser::parse("CREATE TABLE students (name TEXT, student_number INT, city INTEGER, grade DOUBLE)", &result);
|
||||
|
||||
ASSERT(result.isValid());
|
||||
ASSERT_EQ(result.size(), 1);
|
||||
|
@ -54,7 +54,7 @@ TEST(CreateStatementTest) {
|
|||
|
||||
TEST(UpdateStatementTest) {
|
||||
SQLParserResult result;
|
||||
SQLParser::parseSQLString("UPDATE students SET grade = 5.0, name = 'test' WHERE name = 'Max Mustermann';", &result);
|
||||
SQLParser::parse("UPDATE students SET grade = 5.0, name = 'test' WHERE name = 'Max Mustermann';", &result);
|
||||
|
||||
ASSERT(result.isValid());
|
||||
ASSERT_EQ(result.size(), 1);
|
||||
|
@ -130,7 +130,7 @@ TEST(ReleaseStatementTest) {
|
|||
|
||||
SQLParserResult parse_and_move(std::string query) {
|
||||
hsql::SQLParserResult result;
|
||||
hsql::SQLParser::parseSQLString(query, &result);
|
||||
hsql::SQLParser::parse(query, &result);
|
||||
// Moves on return.
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ CONFLICT_RET=0
|
|||
#################################################
|
||||
# Running SQL parser tests.
|
||||
printf "\n${GREEN}Running SQL parser tests...${NC}\n"
|
||||
bin/sql_tests -f "test/valid_queries.sql"
|
||||
bin/tests -f "test/valid_queries.sql"
|
||||
SQL_TEST_RET=$?
|
||||
|
||||
if [ $SQL_TEST_RET -eq 0 ]; then
|
||||
|
@ -31,7 +31,7 @@ fi
|
|||
# Running memory leak checks.
|
||||
printf "\n${GREEN}Running memory leak checks...${NC}\n"
|
||||
valgrind --leak-check=full --error-exitcode=200 --log-fd=3 \
|
||||
./bin/sql_tests -f "test/valid_queries.sql" 3>&1 >/dev/null 2>/dev/null
|
||||
./bin/tests -f "test/valid_queries.sql" 3>&1 >/dev/null 2>/dev/null
|
||||
MEM_LEAK_RET=$?
|
||||
|
||||
if [ $MEM_LEAK_RET -ne 200 ]; then
|
||||
|
|
|
@ -31,16 +31,30 @@ TEST(TPCHQueryGrammarTests) {
|
|||
"test/queries/tpc-h-08.sql",
|
||||
"test/queries/tpc-h-09.sql",
|
||||
"test/queries/tpc-h-10.sql",
|
||||
"test/queries/tpc-h-11-15.sql",
|
||||
"test/queries/tpc-h-16-22.sql"
|
||||
"test/queries/tpc-h-11.sql",
|
||||
"test/queries/tpc-h-12.sql",
|
||||
"test/queries/tpc-h-13.sql",
|
||||
"test/queries/tpc-h-14.sql",
|
||||
"test/queries/tpc-h-15.sql",
|
||||
"test/queries/tpc-h-16.sql",
|
||||
"test/queries/tpc-h-17.sql",
|
||||
"test/queries/tpc-h-18.sql",
|
||||
"test/queries/tpc-h-19.sql",
|
||||
"test/queries/tpc-h-20.sql",
|
||||
"test/queries/tpc-h-21.sql",
|
||||
"test/queries/tpc-h-22.sql",
|
||||
};
|
||||
|
||||
int testsFailed = 0;
|
||||
std::string concatenated = "";
|
||||
for (const std::string& file_path : files) {
|
||||
std::string query = readFileContents(file_path);
|
||||
|
||||
concatenated += query;
|
||||
if (concatenated.back() != ';') concatenated += ';';
|
||||
|
||||
SQLParserResult result;
|
||||
SQLParser::parseSQLString(query.c_str(), &result);
|
||||
SQLParser::parse(query.c_str(), &result);
|
||||
if (!result.isValid()) {
|
||||
mt::printFailed(file_path.c_str());
|
||||
printf("%s %s (L%d:%d)%s\n", mt::red(), result.errorMsg(), result.errorLine(), result.errorColumn(), mt::def());
|
||||
|
@ -49,18 +63,29 @@ TEST(TPCHQueryGrammarTests) {
|
|||
mt::printOk(file_path.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
SQLParserResult result;
|
||||
SQLParser::parse(concatenated.c_str(), &result);
|
||||
if (!result.isValid()) {
|
||||
mt::printFailed("TPCHAllConcatenated");
|
||||
printf("%s %s (L%d:%d)%s\n", mt::red(), result.errorMsg(), result.errorLine(), result.errorColumn(), mt::def());
|
||||
++testsFailed;
|
||||
} else {
|
||||
mt::printOk("TPCHAllConcatenated");
|
||||
}
|
||||
|
||||
ASSERT_EQ(testsFailed, 0);
|
||||
}
|
||||
|
||||
TEST(TPCHQueryDetailTest) {
|
||||
std::string query = readFileContents("test/queries/tpc-h-16-22.sql");
|
||||
std::string query = readFileContents("test/queries/tpc-h-20.sql");
|
||||
|
||||
SQLParserResult result;
|
||||
SQLParser::parseSQLString(query.c_str(), &result);
|
||||
SQLParser::parse(query.c_str(), &result);
|
||||
ASSERT(result.isValid());
|
||||
ASSERT_EQ(result.size(), 7);
|
||||
ASSERT_EQ(result.size(), 1);
|
||||
|
||||
const SQLStatement* stmt20 = result.getStatement(4);
|
||||
const SQLStatement* stmt20 = result.getStatement(0);
|
||||
ASSERT_EQ(stmt20->type(), kStmtSelect);
|
||||
|
||||
const SelectStatement* select20 = (const SelectStatement*) stmt20;
|
||||
|
|
Loading…
Reference in New Issue