Benchmarking (#27)

Adds benchmarking capabilities and small grammar fix.
2017-03-06 18:30:35 +01:00 · 2017-03-06 18:30:35 +01:00 · 42049b4d56
commit 42049b4d56
parent 23621fa862
9 changed files with 196 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -33,4 +33,6 @@ lib-test/
 *.app

 *.cpp.orig
-*.h.orig
+*.h.orig
+
+benchmark/parser_benchmark
--- a/3
+++ b/3
@ -54,6 +54,9 @@ format:
 	astyle --options=astyle.options $(ALLLIB)
 	astyle --options=astyle.options $(ALLTEST)

+run_benchmark:
+	make -C benchmark/ clean run
+
 ############
 ### Test ###
 ############
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@ -0,0 +1,13 @@
+
+CFLAGS = -std=c++11 -lstdc++ -Wall -I../src/ -L../
+
+all: parser_benchmark
+
+run: parser_benchmark
+	@export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../ && ./parser_benchmark
+
+parser_benchmark: parser_benchmark.cpp
+	$(CXX) $(CFLAGS) parser_benchmark.cpp -o parser_benchmark -lbenchmark -lpthread -lsqlparser
+
+clean:
+	rm -f parser_benchmark
--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -0,0 +1,34 @@
+# Benchmark
+
+This directory contains the scripts to execute benchmarks of the parser. We use [Google Benchmark](https://github.com/google/benchmark) to define and run benchmarks.
+
+## Install Google Benchmark
+
+```bash
+cmake -DCMAKE_BUILD_TYPE=Release
+
+make
+
+make install
+```
+
+## Run the benchmarks
+
+Build the libary from the parent directory and then execute:
+
+```bash
+make run
+
+# or manually...
+
+make
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../
+./parser_benchmark
+```
+
+... or run this from the parent directory:
+
+```bash
+# From root of Git repository.
+make run_benchmark
+```
--- a/benchmark/benchmark_utils.h
+++ b/benchmark/benchmark_utils.h
@ -0,0 +1,24 @@
+#ifndef __BENCHMARK_UTILS_H__
+#define __BENCHMARK_UTILS_H__
+
+
+#define TIME_DIFF(end, start)\
+  std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
+
+#define NOW()\
+  std::chrono::high_resolution_clock::now();
+
+
+
+#define PARSE_QUERY_BENCHMARK(name, query)\
+  static void name(benchmark::State& st) {\
+    while (st.KeepRunning()) {\
+      hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);\
+      if (!result->isValid()) st.SkipWithError("Parsing failed!");\
+      delete result;\
+    }\
+  }\
+  BENCHMARK(name);
+
+
+#endif
--- a/benchmark/parser_benchmark.cpp
+++ b/benchmark/parser_benchmark.cpp
@ -0,0 +1,105 @@
+
+#include <chrono>
+#include <sstream>
+#include "benchmark/benchmark.h"
+
+#include "SQLParser.h"
+#include "parser/bison_parser.h"
+#include "parser/flex_lexer.h"
+
+#include "benchmark_utils.h"
+
+
+PARSE_QUERY_BENCHMARK(BM_SimpleSelect,
+  "SELECT * FROM test;");
+
+PARSE_QUERY_BENCHMARK(BM_SimpleSubSelect,
+  "SELECT age, street AS address FROM (SELECT * FROM data);");
+
+PARSE_QUERY_BENCHMARK(BM_TwoSelects,
+  "SELECT * FROM test; SELECT age, street AS address FROM data;");
+
+PARSE_QUERY_BENCHMARK(BM_LongSelectList26,
+  "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
+
+PARSE_QUERY_BENCHMARK(BM_LongSelectList52,
+  "SELECT a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z FROM test;");
+
+PARSE_QUERY_BENCHMARK(BM_LongSelectElement26,
+  "SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
+
+PARSE_QUERY_BENCHMARK(BM_LongSelectElement52,
+  "SELECT aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa FROM test;");
+
+// Benchmark the influence of increasing size of the query, while
+// the number of tokens remains unchanged.
+static void BM_CharacterCount(benchmark::State& st) {
+  const size_t querySize = st.range(0);
+
+  // Base query has size of 18 characters.
+  std::string query = "SELECT %name% FROM test;";
+
+  const uint pad = querySize - 18;
+  const std::string filler = std::string(pad, 'a');
+  query.replace(7, 6, filler);
+
+  while (st.KeepRunning()) {
+    hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);
+    delete result;
+  }
+}
+BENCHMARK(BM_CharacterCount)
+  ->RangeMultiplier(1 << 2)
+  ->Ranges({{1 << 5, 1 << 15},
+            {5, 5}});
+
+// Benchmark the influence of increasing number of tokens, while
+// the number of characters remains unchanged.
+static void BM_ConditionalTokens(benchmark::State& st) {
+  const size_t targetSize = st.range(0);
+  const size_t numTokens = st.range(1);
+
+  // Base query contains 6 tokens.
+  std::string query = "SELECT * FROM test";
+
+  // Create conditional.
+  std::stringstream condStream;
+  size_t missingTokens = numTokens - 4;
+  if (missingTokens > 0) {
+    condStream << " WHERE a";
+    missingTokens -= 2;
+
+    while (missingTokens > 0) {
+      condStream << " AND a";
+      missingTokens -= 2;
+    }
+  }
+
+  query += condStream.str();
+
+  if (targetSize >= query.size()) {
+    const size_t pad = targetSize - query.size();
+    const std::string filler = std::string(pad, 'a');
+    query.replace(7, 1, filler);
+
+  } else {
+    // Query can't be the same length as in the other benchmarks.
+    // Running this will result in unusable data.
+    fprintf(stderr, "Too many tokens. Query too long for benchmark char limit (%lu > %lu).\n",
+      query.size(), targetSize);
+    return;
+  }
+
+  while (st.KeepRunning()) {
+    hsql::SQLParserResult* result = hsql::SQLParser::parseSQLString(query);
+    if (!result->isValid()) st.SkipWithError("Parsing failed!");\
+    delete result;
+  }
+}
+BENCHMARK(BM_ConditionalTokens)
+  ->RangeMultiplier(1 << 2)
+  ->Ranges({{1 << 14, 1 << 14},
+            {1 << 2, 1 << 11}});
+
+
+BENCHMARK_MAIN();
--- a/example/example.cpp
+++ b/example/example.cpp
@ -31,7 +31,11 @@ int main(int argc, char *argv[]) {
        delete result;
        return 0;
    } else {
-        printf("Invalid SQL!\n");
+        fprintf(stderr, "Given string is not a valid SQL query.\n");
+        fprintf(stderr, "%s (L%d:%d)\n", 
+                result->errorMsg(),
+                result->errorLine(),
+                result->errorColumn());
        delete result;
        return -1;
    }
--- a/src/parser/bison_parser.y
+++ b/src/parser/bison_parser.y
@ -487,6 +487,12 @@ select_no_paren:
 			$$->order = $4;
 			$$->limit = $5;
 		}
+	|	select_clause set_operator select_with_paren opt_order opt_limit {
+			$$ = $1;
+			$$->unionSelect = $3;
+			$$->order = $4;
+			$$->limit = $5;
+		}
 	;

 set_operator:
@ -678,7 +684,7 @@ table_ref:

 table_ref_atomic:
 		table_ref_name
-	|	'(' select_statement ')' alias {
+	|	'(' select_statement ')' opt_alias {
 			auto tbl = new TableRef(kTableSelect);
 			tbl->select = $2;
 			tbl->alias = $4;
--- a/test/valid_queries.sql
+++ b/test/valid_queries.sql
@ -6,7 +6,8 @@ SELECT * from "table" JOIN table2 ON a = b WHERE (b OR NOT a) AND a = 12.5
 (SELECT a FROM foo WHERE a > 12 OR b > 3 AND c NOT LIKE 's%' LIMIT 10);
 SELECT * FROM "table" LIMIT 10 OFFSET 10; SELECT * FROM second;
 SELECT * FROM t1 UNION SELECT * FROM t2 ORDER BY col1;
-- SELECT * FROM t1 UNION (SELECT * FROM t2 UNION SELECT * FROM t3) ORDER BY col1; 
+SELECT * FROM (SELECT * FROM t1);
+SELECT * FROM t1 UNION (SELECT * FROM t2 UNION SELECT * FROM t3) ORDER BY col1;
 # JOIN
 SELECT t1.a, t1.b, t2.c FROM "table" AS t1 JOIN (SELECT * FROM foo JOIN bar ON foo.id = bar.id) t2 ON t1.a = t2.b WHERE (t1.b OR NOT t1.a) AND t2.c = 12.5
 SELECT * FROM t1 JOIN t2 ON c1 = c2;