commit 867a3bb1c0530a80682a4349d22c60910220c510 Author: Jan Mühlig Date: Sun Apr 26 16:38:08 2020 +0200 Initialized BeeDB diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..428d393 --- /dev/null +++ b/.clang-format @@ -0,0 +1,127 @@ +--- +Language: Cpp +# BasedOnStyle: Microsoft +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: false + AfterExternBlock: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 1000 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseTab: Never +... + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6ea67d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.o +build +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles/ +Makefile +bee.db +cmake_install.cmake +lib/include/ +lib/libsqlparser.so +patch/* diff --git a/.pre-commit-hook b/.pre-commit-hook new file mode 100755 index 0000000..ab40424 --- /dev/null +++ b/.pre-commit-hook @@ -0,0 +1,29 @@ +#!/bin/bash + +STYLE=$(git config --get hooks.clangformat.style) +if [ -n "${STYLE}" ] ; then + STYLEARG="-style=${STYLE}" +else + STYLEARG="" +fi + +format_file() { + file="${1}" + if [ -f $file ]; then + if [ "${file##*.}" = "cpp" ] || [ "${file##*.}" = "h" ] || [ "${file##*.}" = "hpp" ]; then + clang-format -i ${STYLEARG} ${1} + git add ${1} + fi + fi +} + +case "${1}" in + --about ) + echo "Runs clang-format on source files" + ;; + * ) + for file in `git diff-index --cached --name-only HEAD` ; do + format_file "${file}" + done + ;; +esac diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..7f1eb73 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,85 @@ +include(ExternalProject) +cmake_minimum_required(VERSION 3.9) + +project(BeeDB) + +## Set default settings +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_FLAGS "-pedantic -Wall -Wextra -Wcast-align -Wcast-qual -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wmissing-declarations -Wmissing-include-dirs -Woverloaded-virtual -Wredundant-decls -Wshadow -Wsign-promo -Wstrict-overflow=5 -Wswitch-default -Wundef -Wno-unused -Wold-style-cast") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3") +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG") +set(CMAKE_BUILD_TYPE DEBUG) # "Debug" is default build type, overwrite with 'cmake -DCMAKE_BUILD_TYPE=Release' + +## SQL Parser as external project +ExternalProject_Add( + sql-parser + GIT_REPOSITORY "https://github.com/jangemue/sql-parser.git" + + PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lib/sql-parser/prefix" + TMP_DIR "${CMAKE_CURRENT_BINARY_DIR}/lib/sql-parser/tmp" + DOWNLOAD_DIR "${CMAKE_CURRENT_BINARY_DIR}/lib/sql-parser/download" + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/lib/sql-parser/src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/lib/sql-parser/bin" +) + +## Include and link directories +include_directories(${PROJECT_SOURCE_DIR}/src/include ${PROJECT_SOURCE_DIR}/lib/ ${CMAKE_CURRENT_BINARY_DIR}/lib/sql-parser/src/src) +link_directories(${CMAKE_CURRENT_BINARY_DIR}/lib ${CMAKE_CURRENT_BINARY_DIR}/lib/sql-parser/bin/lib) + +## BeeDB sources +add_executable(beedb + src/beedb.cpp + src/database.cpp + src/disk/storage_manager.cpp + src/disk/buffer_manager.cpp + src/disk/lru_strategy.cpp + src/disk/lru_k_strategy.cpp + src/disk/lfu_strategy.cpp + src/table/table.cpp + src/table/column.cpp + src/table/value.cpp + src/table/table_disk_manager.cpp + src/execution/binary_operator.cpp + src/execution/sequential_scan_operator.cpp + src/execution/index_scan_operator.cpp + src/execution/create_table_operator.cpp + src/execution/create_index_operator.cpp + src/execution/insert_operator.cpp + src/execution/selection_operator.cpp + src/execution/projection_operator.cpp + src/execution/nested_loops_join_operator.cpp + src/execution/hash_join_operator.cpp + src/execution/limit_operator.cpp + src/execution/build_index_operator.cpp + src/execution/order_operator.cpp + src/execution/aggregate_operator.cpp + src/execution/cross_product_operator.cpp + src/execution/tuple_buffer_operator.cpp + src/execution/add_to_index_operator.cpp + src/execution/update_operator.cpp + src/expression/attribute.cpp + src/expression/predicate.cpp + src/plan/physical/plan.cpp + src/plan/logical/builder.cpp + src/plan/physical/builder.cpp + src/io/executor.cpp + src/io/file_executor.cpp + src/io/user_console.cpp + src/io/printing_executor.cpp + src/io/result_output_formatter.cpp + src/io/command/commander.cpp + src/io/command/custom_commands.cpp + src/util/text_table.cpp + src/util/ini_parser.cpp + src/parser/query_parser.cpp + src/parser/hsql_parser.cpp + src/plan/logical/plan.cpp + src/plan/optimizer/optimizer.cpp +) + +## Build target +add_dependencies(beedb sql-parser) +target_link_libraries(beedb sqlparser) + +## Git install hook target +add_custom_target(git-hook cp ${PROJECT_SOURCE_DIR}/.pre-commit-hook ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit && chmod +x ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2101e25 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 TheScriptbot + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..364697f --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +# BeeDB + +BeeDB is a software project that teaches students the architecture and implementation of DataBase Management Systems. +This project is related to the Basic Module *Architecture & Implementation of DBMS*. The course is usually teached every summer term, see [dbis page of TU Dortmund](http://dbis.cs.tu-dortmund.de/cms/en/teaching/index.html) for more information. + +**Attention: Please do not publish and share your solution with other students!** + +## Dependencies +* `git` +* `cmake` (at least version `3.9`) +* `build-essential` +* `bison` and `flex` + +## How to build + * `cmake .` + * `make` + +**OR**, if you prefer a separate `build` folder: + + * `mkdir build && cd build` + * `cmake ..` + * `make` + +Default build is in `Debug` mode. +If you want to build in `Release` mode use `cmake . -DCMAKE_BUILD_TYPE=Release` or set `CMAKE_BUILD_TYPE` in `CMakeLists.txt`. + +## How to use + + Usage: beedb [options] db-file + Positional arguments: + db-file File the database is stored in. Default: bee.db + Optional arguments: + -h --help show this help message and exit + -l --load Load SQL file into database. + -q --query Execute Query. + -c --console Run console after loading file or executing query. + --buffer-manager-frames Number of frames within the frame buffer. + --scan-page-limit Number of pages the SCAN operator can pin at a time. + --enable-index-scan Enable index scan and use whenever possible. + --enable-hash-join Enable hash join and use whenever possible. + --stats Print all execution statistics + +## Configuration +Some configuration outside the console arguments is stored in the file `beedb.ini`. +* The number of pages stored as frames in the buffer manager (`buffer manager.frames`) +* The replacement strategy of frames in the buffer manager (`buffer manager.strategy`) +* The `k` parameter for `LRU-K` replacement strategy (`buffer manager.k`) +* The number of how many pages can be pinned by a scan at a time (`scan.page-limit`) +* Enable or disable usage of index scan (`scan.enable-index-scan`) +* Enable or disable usage of hash join (`join.enable-hash-join`) + +## Non-SQL Commands + +* `:explain [plan,graph]`: prints the query plan, either as a table or a graph (a list of nodes and edges) +* `:get [option-name]`: prints either all or the secified option of the database configuration +* `:set option-name numerical-value`: changes the specified option. Only numerical values are valid +* `:show [tables,indices,columns]`: A quick way to show available tables, their columns or indices + +## Examples + +##### Import +`./beedb -l movies.sql` + +##### Run a single query +`./beedb -q "SELECT * FROM movie;"` + +##### Open the BeeDB Console +`./beedb` + +##### Run a query and open console afterwards +`./beedb -q "SELECT * FROM movie;" -c` + +# For developers +* If you want to commit to the repository please `make git-hook` before commit. + +# Credits +* Thanks to Hyrise for the SQL parser (MIT, [See on GitHub](https://github.com/hyrise/sql-parser)) +* Thanks to p-ranav for argparse (MIT, [See on GitHub](https://github.com/p-ranav/argparse)) diff --git a/beedb.ini b/beedb.ini new file mode 100644 index 0000000..2dfc2b7 --- /dev/null +++ b/beedb.ini @@ -0,0 +1,14 @@ +[buffer manager] +frames = 256 +strategy = LRU ;LRU-K | LFU | LRU +k = 2 ; LRU-K parameter + +[scan] +page-limit = 64 +enable-index-scan = 0 ; 1 for enable index scan + +[join] +enable-hash-join = 0 ; 1 for enable hash join + +[execution] +print-statistics = 0 ; 1 for printing all execution statistics diff --git a/lib/argparse/argparse.hpp b/lib/argparse/argparse.hpp new file mode 100644 index 0000000..4328492 --- /dev/null +++ b/lib/argparse/argparse.hpp @@ -0,0 +1,539 @@ +/* + __ _ _ __ __ _ _ __ __ _ _ __ ___ ___ + / _` | '__/ _` | '_ \ / _` | '__/ __|/ _ \ Argument Parser for Modern C++ +| (_| | | | (_| | |_) | (_| | | \__ \ __/ http://github.com/p-ranav/argparse + \__,_|_| \__, | .__/ \__,_|_| |___/\___| + |___/|_| + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2019 Pranav Srinivas Kumar . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace argparse { + + namespace details { // namespace for helper methods + + template struct is_container_helper {}; + + template + struct is_container : std::false_type {}; + + template <> struct is_container : std::false_type {}; + + template + struct is_container< + T, + std::conditional_t().begin()), + decltype(std::declval().end()), + decltype(std::declval().size())>, + void>> : public std::true_type {}; + + template + static constexpr bool is_container_v = is_container::value; + + template + using enable_if_container = std::enable_if_t, T>; + + template + using enable_if_not_container = std::enable_if_t, T>; + } // namespace + + class Argument { + friend class ArgumentParser; + + public: + Argument() = default; + + template + explicit Argument(Args... args) + : mNames({std::move(args)...}), mIsOptional((is_optional(args) || ...)) { + std::sort( + mNames.begin(), mNames.end(), [](const auto &lhs, const auto &rhs) { + return lhs.size() == rhs.size() ? lhs < rhs : lhs.size() < rhs.size(); + }); + } + + Argument &help(std::string aHelp) { + mHelp = std::move(aHelp); + return *this; + } + + Argument &default_value(std::any aDefaultValue) { + mDefaultValue = std::move(aDefaultValue); + return *this; + } + + Argument &required() { + mIsRequired = true; + return *this; + } + + Argument &implicit_value(std::any aImplicitValue) { + mImplicitValue = std::move(aImplicitValue); + mNumArgs = 0; + return *this; + } + + Argument &action(std::function aAction) { + mAction = std::move(aAction); + return *this; + } + + Argument &nargs(size_t aNumArgs) { + mNumArgs = aNumArgs; + return *this; + } + + template + Iterator consume(Iterator start, Iterator end, std::string usedName = {}) { + if (mIsUsed) { + throw std::runtime_error("Duplicate argument"); + } + mIsUsed = true; + mUsedName = std::move(usedName); + if (mNumArgs == 0) { + mValues.emplace_back(mImplicitValue); + return start; + } else if (mNumArgs <= static_cast(std::distance(start, end))) { + end = std::next(start, mNumArgs); + if (std::any_of(start, end, Argument::is_optional)) { + throw std::runtime_error("optional argument in parameter sequence"); + } + std::transform(start, end, std::back_inserter(mValues), mAction); + return end; + } else if (mDefaultValue.has_value()) { + return start; + } else { + throw std::runtime_error("Too few arguments"); + } + } + + /* + * @throws std::runtime_error if argument values are not valid + */ + void validate() const { + if (mIsOptional) { + if (mIsUsed && mValues.size() != mNumArgs && !mDefaultValue.has_value()) { + std::stringstream stream; + stream << mUsedName << ": expected " << mNumArgs + << " argument(s). " << mValues.size() << " provided."; + throw std::runtime_error(stream.str()); + } else { + // TODO: check if an implicit value was programmed for this argument + if (!mIsUsed && !mDefaultValue.has_value() && mIsRequired) { + std::stringstream stream; + stream << mNames[0] << ": required."; + throw std::runtime_error(stream.str()); + } + if (mIsUsed && mIsRequired && mValues.size() == 0) { + std::stringstream stream; + stream << mUsedName << ": no value provided."; + throw std::runtime_error(stream.str()); + } + } + } else { + if (mValues.size() != mNumArgs && !mDefaultValue.has_value()) { + std::stringstream stream; + stream << mUsedName << ": expected " << mNumArgs + << " argument(s). " << mValues.size() << " provided."; + throw std::runtime_error(stream.str()); + } + } + } + + size_t get_arguments_length() const { + return std::accumulate(std::begin(mNames), std::end(mNames), size_t(0), + [](const auto &sum, const auto &s) { + return sum + s.size() + + 1; // +1 for space between names + }); + } + + friend std::ostream &operator<<(std::ostream &stream, + const Argument &argument) { + std::stringstream nameStream; + std::copy(std::begin(argument.mNames), std::end(argument.mNames), + std::ostream_iterator(nameStream, " ")); + stream << nameStream.str() << "\t" << argument.mHelp; + if (argument.mIsRequired) + stream << "[Required]"; + stream << "\n"; + return stream; + } + + template bool operator!=(const T &aRhs) const { + return !(*this == aRhs); + } + + /* + * Entry point for template non-container types + * @throws std::logic_error in case of incompatible types + */ + template + std::enable_if_t, bool> operator==(const T &aRhs) const { + return get() == aRhs; + } + + /* + * Template specialization for containers + * @throws std::logic_error in case of incompatible types + */ + template + std::enable_if_t, bool> operator==(const T &aRhs) const { + using ValueType = typename T::value_type; + auto tLhs = get(); + if (tLhs.size() != aRhs.size()) + return false; + else { + return std::equal(std::begin(tLhs), std::end(tLhs), std::begin(aRhs), + [](const auto &lhs, const auto &rhs) { + return std::any_cast(lhs) == rhs; + }); + } + } + + private: + static bool is_integer(const std::string &aValue) { + if (aValue.empty() || + ((!isdigit(aValue[0])) && (aValue[0] != '-') && (aValue[0] != '+'))) + return false; + char *tPtr; + strtol(aValue.c_str(), &tPtr, 10); + return (*tPtr == 0); + } + + static bool is_float(const std::string &aValue) { + std::istringstream tStream(aValue); + float tFloat; + // noskipws considers leading whitespace invalid + tStream >> std::noskipws >> tFloat; + // Check the entire string was consumed + // and if either failbit or badbit is set + return tStream.eof() && !tStream.fail(); + } + + // If an argument starts with "-" or "--", then it's optional + static bool is_optional(const std::string &aName) { + return (!aName.empty() && aName[0] == '-' && !is_integer(aName) && + !is_float(aName)); + } + + static bool is_positional(const std::string &aName) { + return !is_optional(aName); + } + + /* + * Getter for template non-container types + * @throws std::logic_error in case of incompatible types + */ + template details::enable_if_not_container get() const { + if (!mValues.empty()) { + return std::any_cast(mValues.front()); + } + if (mDefaultValue.has_value()) { + return std::any_cast(mDefaultValue); + } + throw std::logic_error("No value provided"); + } + + /* + * Getter for container types + * @throws std::logic_error in case of incompatible types + */ + template details::enable_if_container get() const { + using ValueType = typename CONTAINER::value_type; + CONTAINER tResult; + if (!mValues.empty()) { + std::transform( + std::begin(mValues), std::end(mValues), std::back_inserter(tResult), + [](const auto &value) { return std::any_cast(value); }); + return tResult; + } + if (mDefaultValue.has_value()) { + const auto &tDefaultValues = + std::any_cast(mDefaultValue); + std::transform(std::begin(tDefaultValues), std::end(tDefaultValues), + std::back_inserter(tResult), [](const auto &value) { + return std::any_cast(value); + }); + return tResult; + } + throw std::logic_error("No value provided"); + } + + std::vector mNames; + std::string mUsedName; + std::string mHelp; + std::any mDefaultValue; + std::any mImplicitValue; + std::function mAction = + [](const std::string &aValue) { return aValue; }; + std::vector mValues; + std::vector mRawValues; + size_t mNumArgs = 1; + bool mIsOptional = false; + bool mIsRequired = false; + bool mIsUsed = false; // relevant for optional arguments. True if used by user + + public: + static constexpr auto mHelpOption = "-h"; + static constexpr auto mHelpOptionLong = "--help"; + }; + + class ArgumentParser { + public: + explicit ArgumentParser(std::string aProgramName = {}) + : mProgramName(std::move(aProgramName)) { + add_argument(Argument::mHelpOption, Argument::mHelpOptionLong) + .help("show this help message and exit") + .nargs(0) + .default_value(false) + .implicit_value(true); + } + + // Parameter packing + // Call add_argument with variadic number of string arguments + template Argument &add_argument(Targs... Fargs) { + std::shared_ptr tArgument = + std::make_shared(std::move(Fargs)...); + + if (tArgument->mIsOptional) + mOptionalArguments.emplace_back(tArgument); + else + mPositionalArguments.emplace_back(tArgument); + + for (const auto &mName : tArgument->mNames) { + mArgumentMap.insert_or_assign(mName, tArgument); + } + return *tArgument; + } + + // Parameter packed add_parents method + // Accepts a variadic number of ArgumentParser objects + template void add_parents(Targs... Fargs) { + const auto tNewParentParsers = {Fargs...}; + for (const auto &tParentParser : tNewParentParsers) { + const auto &tPositionalArguments = tParentParser.mPositionalArguments; + std::copy(std::begin(tPositionalArguments), + std::end(tPositionalArguments), + std::back_inserter(mPositionalArguments)); + + const auto &tOptionalArguments = tParentParser.mOptionalArguments; + std::copy(std::begin(tOptionalArguments), std::end(tOptionalArguments), + std::back_inserter(mOptionalArguments)); + + const auto &tArgumentMap = tParentParser.mArgumentMap; + for (const auto &[tKey, tValue] : tArgumentMap) { + mArgumentMap.insert_or_assign(tKey, tValue); + } + } + std::move(std::begin(tNewParentParsers), std::end(tNewParentParsers), + std::back_inserter(mParentParsers)); + } + + /* Call parse_args_internal - which does all the work + * Then, validate the parsed arguments + * This variant is used mainly for testing + * @throws std::runtime_error in case of any invalid argument + */ + void parse_args(const std::vector &aArguments) { + parse_args_internal(aArguments); + parse_args_validate(); + } + + /* Main entry point for parsing command-line arguments using this + * ArgumentParser + * @throws std::runtime_error in case of any invalid argument + */ + void parse_args(int argc, const char *const argv[]) { + std::vector arguments; + std::copy(argv, argv + argc, std::back_inserter(arguments)); + parse_args(arguments); + } + + /* Getter enabled for all template types other than std::vector and std::list + * @throws std::logic_error in case of an invalid argument name + * @throws std::logic_error in case of incompatible types + */ + template T get(const std::string &aArgumentName) { + auto tIterator = mArgumentMap.find(aArgumentName); + if (tIterator != mArgumentMap.end()) { + return tIterator->second->get(); + } + throw std::logic_error("No such argument"); + } + + /* Indexing operator. Return a reference to an Argument object + * Used in conjuction with Argument.operator== e.g., parser["foo"] == true + * @throws std::logic_error in case of an invalid argument name + */ + Argument &operator[](const std::string &aArgumentName) { + auto tIterator = mArgumentMap.find(aArgumentName); + if (tIterator != mArgumentMap.end()) { + return *(tIterator->second); + } + throw std::logic_error("No such argument"); + } + + // Printing the one and only help message + // I've stuck with a simple message format, nothing fancy. + // TODO: support user-defined help and usage messages for the ArgumentParser + std::string print_help() { + std::stringstream stream; + stream << std::left; + stream << "Usage: " << mProgramName << " [options] "; + size_t tLongestArgumentLength = get_length_of_longest_argument(); + + for (const auto &argument : mPositionalArguments) { + stream << argument->mNames.front() << " "; + } + stream << "\n\n"; + + if (!mPositionalArguments.empty()) + stream << "Positional arguments:\n"; + + for (const auto &mPositionalArgument : mPositionalArguments) { + stream.width(tLongestArgumentLength); + stream << *mPositionalArgument; + } + + if (!mOptionalArguments.empty()) + stream << (mPositionalArguments.empty() ? "" : "\n") + << "Optional arguments:\n"; + + for (const auto &mOptionalArgument : mOptionalArguments) { + stream.width(tLongestArgumentLength); + stream << *mOptionalArgument; + } + + std::cout << stream.str(); + return stream.str(); + } + + private: + /* + * @throws std::runtime_error in case of any invalid argument + */ + void parse_args_internal(const std::vector &aArguments) { + if (mProgramName.empty() && !aArguments.empty()) { + mProgramName = aArguments.front(); + } + auto end = std::end(aArguments); + auto positionalArgumentIt = std::begin(mPositionalArguments); + for (auto it = std::next(std::begin(aArguments)); it != end;) { + const auto &tCurrentArgument = *it; + if (tCurrentArgument == Argument::mHelpOption || + tCurrentArgument == Argument::mHelpOptionLong) { + throw std::runtime_error("help called"); + } + if (Argument::is_positional(tCurrentArgument)) { + if (positionalArgumentIt == std::end(mPositionalArguments)) { + throw std::runtime_error( + "Maximum number of positional arguments exceeded"); + } + auto tArgument = *(positionalArgumentIt++); + it = tArgument->consume(it, end); + } else if (auto tIterator = mArgumentMap.find(tCurrentArgument); + tIterator != mArgumentMap.end()) { + auto tArgument = tIterator->second; + it = tArgument->consume(std::next(it), end, tCurrentArgument); + } else if (const auto &tCompoundArgument = tCurrentArgument; + tCompoundArgument.size() > 1 && tCompoundArgument[0] == '-' && + tCompoundArgument[1] != '-') { + ++it; + for (size_t j = 1; j < tCompoundArgument.size(); j++) { + auto iCurrentArgument = std::string{'-', tCompoundArgument[j]}; + if (auto iIterator = mArgumentMap.find(iCurrentArgument); + iIterator != mArgumentMap.end()) { + auto tArgument = iIterator->second; + it = tArgument->consume(it, end, iCurrentArgument); + } else { + throw std::runtime_error("Unknown argument"); + } + } + } else { + throw std::runtime_error("Unknown argument"); + } + } + } + + /* + * @throws std::runtime_error in case of any invalid argument + */ + void parse_args_validate() { + // Check if all arguments are parsed + std::for_each(std::begin(mArgumentMap), std::end(mArgumentMap), + [](const auto &argPair) { + const auto &tArgument = argPair.second; + tArgument->validate(); + }); + } + + // Used by print_help. + size_t get_length_of_longest_argument() { + if (mArgumentMap.empty()) + return 0; + std::vector argumentLengths(mArgumentMap.size()); + std::transform(std::begin(mArgumentMap), std::end(mArgumentMap), + std::begin(argumentLengths), [](const auto &argPair) { + const auto &tArgument = argPair.second; + return tArgument->get_arguments_length(); + }); + return *std::max_element(std::begin(argumentLengths), + std::end(argumentLengths)); + } + + std::string mProgramName; + std::vector mParentParsers; + std::vector> mPositionalArguments; + std::vector> mOptionalArguments; + std::map> mArgumentMap; + }; + +#define PARSE_ARGS(parser, argc, argv) \ + try { \ + parser.parse_args(argc, argv); \ + } catch (const std::runtime_error &err) { \ + std::cout << err.what() << std::endl; \ + parser.print_help(); \ + exit(0); \ + } + +} // namespace argparse diff --git a/src/beedb.cpp b/src/beedb.cpp new file mode 100644 index 0000000..abe4614 --- /dev/null +++ b/src/beedb.cpp @@ -0,0 +1,155 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int arg_count, char **args) +{ + // Read configuration file. + beedb::util::IniParser ini_parser{"beedb.ini"}; + if (ini_parser.empty()) + { + std::cout << "[Warning] Missing configuration file beedb.ini" << std::endl; + } + + const auto buffer_frames = ini_parser.get("buffer manager", "frames", 256u); + const auto scan_page_limit = ini_parser.get("scan", "page-limit", 64u); + const auto enable_index_scan = ini_parser.get("scan", "enable-index-scan", false); + const auto buffer_replacement_strategy = ini_parser.get("buffer manager", "strategy", "LRU"); + const auto lru_k = ini_parser.get("buffer manager", "k", 2u); + const auto enable_hash_join = ini_parser.get("join", "enable-hash-join", false); + const auto print_statistics = ini_parser.get("execution", "print-statistics", false); + + // Parse command line arguments + argparse::ArgumentParser argument_parser("beedb"); + argument_parser.add_argument("db-file") + .help("File the database is stored in. Default: bee.db") + .default_value(std::string("bee.db")); + argument_parser.add_argument("-l", "--load").help("Load SQL file into database.").default_value(std::string("")); + argument_parser.add_argument("-q", "--query").help("Execute Query.").default_value(std::string("")); + argument_parser.add_argument("-c", "--console") + .help("Run console after loading file or executing query.") + .implicit_value(true) + .default_value(false); + argument_parser.add_argument("--buffer-manager-frames") + .help("Number of frames within the frame buffer.") + .default_value(buffer_frames) + .action([](const std::string &value) { return std::uint32_t(std::stoi(value)); }); + argument_parser.add_argument("--scan-page-limit") + .help("Number of pages the SCAN operator can pin at a time.") + .default_value(scan_page_limit) + .action([](const std::string &value) { return std::uint32_t(std::stoi(value)); }); + argument_parser.add_argument("--enable-index-scan") + .help("Enable index scan and use whenever possible.") + .implicit_value(true) + .default_value(enable_index_scan); + argument_parser.add_argument("--enable-hash-join") + .help("Enable hash join and use whenever possible.") + .implicit_value(true) + .default_value(enable_hash_join); + argument_parser.add_argument("--stats") + .help("Print all execution statistics") + .implicit_value(true) + .default_value(print_statistics); + + try + { + argument_parser.parse_args(arg_count, args); + } + catch (std::runtime_error &) + { + argument_parser.print_help(); + return 1; + } + + std::regex lfu_regex("lfu", std::regex::icase); + std::regex lru_k_regex("lru-k", std::regex::icase); + std::smatch match; + beedb::Config::BufferReplacementStrategy strategy = beedb::Config::LRU; + if (std::regex_match(buffer_replacement_strategy, match, lfu_regex)) + { + strategy = beedb::Config::LFU; + } + else if (std::regex_match(buffer_replacement_strategy, match, lru_k_regex)) + { + strategy = beedb::Config::LRU_K; + } + + beedb::Config config{}; + + config.set(beedb::Config::k_BufferFrames, argument_parser.get("--buffer-manager-frames"), + beedb::Config::ConfigMapValue::immutable); + config.set(beedb::Config::k_BufferReplacementStrategy, strategy, beedb::Config::ConfigMapValue::immutable); + config.set(beedb::Config::k_LRU_K, lru_k, beedb::Config::ConfigMapValue::immutable); + config.set(beedb::Config::k_ScanPageLimit, argument_parser.get("--scan-page-limit"), + beedb::Config::ConfigMapValue::immutable); + config.set(beedb::Config::k_OptimizationEnableIndexScan, argument_parser.get("--enable-index-scan")); + config.set(beedb::Config::k_OptimizationEnableHashJoin, argument_parser.get("--enable-hash-join")); + config.set(beedb::Config::k_OptimizationDisableOptimization, + true); // true (at first), to disable optimization during boot + config.set(beedb::Config::k_CheckFinalPlan, false); + config.set(beedb::Config::k_PrintExecutionStatistics, argument_parser.get("--stats")); + + const auto database_file_name = argument_parser.get("db-file"); + const auto sql_file = argument_parser.get("-l"); + const auto query = argument_parser.get("-q"); + const auto is_console = argument_parser.get("-c"); + + beedb::Database database(config, database_file_name); + database.boot(); + + // re-enable optimization for user-queries, after boot code ran: + config.set(beedb::Config::k_OptimizationDisableOptimization, false); + + // re-enable checks on all plans, when in debug mode: + assert(config.set(beedb::Config::k_CheckFinalPlan, true)); + + if (sql_file.empty() == false) + { + beedb::io::FileExecutor file_importer(database); + file_importer.import_file(sql_file); + } + + if (query.empty() == false) + { + beedb::io::PrintingExecutor printing_executor(database); + printing_executor.execute(beedb::io::Query{query}); + } + + if ((sql_file.empty() == false || query.empty() == false) && is_console == false) + { + return 0; + } + + beedb::io::UserConsole user_console(database); + user_console.wait_for_input(); + + return 0; +} \ No newline at end of file diff --git a/src/database.cpp b/src/database.cpp new file mode 100644 index 0000000..84ed924 --- /dev/null +++ b/src/database.cpp @@ -0,0 +1,296 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace beedb; + +Database::Database(Config &config, const std::string &file_name) + : _config(config), _storage_manager(file_name), _buffer_manager(config[Config::k_BufferFrames], _storage_manager), + _table_disk_manager(_buffer_manager) +{ + // Initialize BufferManagerStrategy. + std::unique_ptr replacement_strategy{}; + if (config[Config::k_BufferReplacementStrategy] == Config::LFU) + { + replacement_strategy.reset(new disk::LFUStrategy()); + } + else if (config[Config::k_BufferReplacementStrategy] == Config::LRU_K) + { + replacement_strategy.reset(new disk::LRUKStrategy(config[Config::k_LRU_K])); + } + else + { + replacement_strategy.reset(new disk::LRUStrategy()); + } + + this->_buffer_manager.replacement_strategy(std::move(replacement_strategy)); +} + +Database::~Database() +{ + // Update statistics + for (auto [_, table] : this->_tables) + { + if (table->is_virtual() == false) + { + this->persist_table_statistics(table, this->_statistics.table_statistics().cardinality(*table)); + } + } + + // Delete tables AFTER all statistics are persisted. + // Otherwise it is possible to delete the statistics table + // before all updates are done. + for (auto [_, table] : this->_tables) + { + delete table; + } +} + +void Database::boot() +{ + // Initialize tables with fixed schema and allocate pages for the data, + // if the file is empty. + this->initialize_database(this->_storage_manager.count_pages() == 0); + + // Read all tables, columns and indices and build tables from the results. + auto tables_executor = io::Executor{*this}; + tables_executor.execute(io::Query{"select * from system_tables"}, [&](const table::Tuple &table_tuple) { + const auto table_id = table_tuple.get(0).get(); + const auto table_name = table_tuple.get(1).get(); + const auto table_page = table_tuple.get(2).get(); + + auto schema = table::Schema{table_name}; + auto columns_executor = io::Executor{*this}; + columns_executor.execute( + io::Query{"select * from system_columns where table_id = " + std::to_string(table_id)}, + [&](const table::Tuple &column_tuple) { + const auto column_id = column_tuple.get(0).get(); + const auto column_type_id = column_tuple.get(2).get(); + const auto column_length = column_tuple.get(3).get(); + const auto column_name = column_tuple.get(4).get(); + const auto column_is_nullable = column_tuple.get(5).get(); + const auto type = + table::Type(static_cast(column_type_id), std::uint16_t(column_length)); + + std::vector> indices; + auto indices_executor = io::Executor{*this}; + indices_executor.execute( + io::Query{"select * from system_indices where column_id = " + std::to_string(column_id)}, + [&](const table::Tuple &index_tuple) { + const auto index_id = index_tuple.get(0).get(); + const auto index_type_id = index_tuple.get(2).get(); + const auto index_name = index_tuple.get(3).get(); + const auto is_unique = index_tuple.get(4).get(); + + indices.push_back(index::IndexFactory::new_index( + index_name, static_cast(index_type_id), static_cast(is_unique))); + this->_next_index_id = std::max(index_id + 1u, this->_next_index_id); + }); + + schema.add({column_id, type, static_cast(column_is_nullable), std::move(indices)}, + {column_name, table_name}); + this->_next_column_id = std::max(column_id + 1u, this->_next_column_id); + }); + + this->_tables[table_name] = new table::Table(table_id, table_page, std::move(schema)); + + auto statistics_executor = io::Executor{*this}; + statistics_executor.execute( + {"select * from system_table_statistics where table_id = " + std::to_string(table_id)}, + [this, table_id](const table::Tuple &tuple) { + const auto cardinality = tuple.get(1).get(); + this->_statistics.table_statistics().cardinality(table_id, cardinality); + }); + + this->_next_table_id = std::max(table_id + 1u, this->_next_table_id); + }); + + // Build and fill all indices. + io::Executor build_index_executor{*this}; + for (auto &[table_name, table] : this->_tables) + { + for (auto i = 0u; i < table->schema().size(); i++) + { + const auto &column = table->schema().column(i); + const auto &attribute = table->schema().attribute(i); + for (auto index : column.indices()) + { + auto plan = plan::physical::Builder::build_index_plan(*this, table_name, attribute.name, index->name()); + build_index_executor.execute(plan); + } + } + } +} + +void Database::initialize_database(const bool create_schema) +{ + if (create_schema) + { + // Allocate page for tables. + auto tables_page = this->_buffer_manager.allocate(); + assert(tables_page->id() == 0); + this->_buffer_manager.unpin(tables_page, false); + + // Allocate page for columns. + auto columns_page = this->_buffer_manager.allocate(); + assert(columns_page->id() == 1); + this->_buffer_manager.unpin(columns_page, false); + + // Allocate page for indices. + auto indices_page = this->_buffer_manager.allocate(); + assert(indices_page->id() == 2); + this->_buffer_manager.unpin(indices_page, false); + + // Allocate page for indices. + auto table_statistics_page = this->_buffer_manager.allocate(); + assert(table_statistics_page->id() == 3); + this->_buffer_manager.unpin(table_statistics_page, false); + } + + std::string tables_name("system_tables"); + table::Schema tables_schema{tables_name}; + tables_schema.add({table::Type::INT, false}, {"id", tables_schema.table_name()}); + tables_schema.add({{table::Type::CHAR, 48}, false}, {"name", tables_schema.table_name()}); + tables_schema.add({table::Type::INT, false}, {"page", tables_schema.table_name()}); + + auto tables_table = new table::Table(-1, 0, std::move(tables_schema)); + + std::string columns_name("system_columns"); + table::Schema columns_schema{columns_name}; + columns_schema.add({table::Type::INT, false}, {"id", columns_schema.table_name()}); + columns_schema.add({table::Type::INT, false}, {"table_id", columns_schema.table_name()}); + columns_schema.add({table::Type::INT, false}, {"type_id", columns_schema.table_name()}); + columns_schema.add({table::Type::INT, false}, {"length", columns_schema.table_name()}); + columns_schema.add({{table::Type::CHAR, 48}, false}, {"name", columns_schema.table_name()}); + columns_schema.add({table::Type::INT, false}, {"is_nullable", columns_schema.table_name()}); + columns_schema.add({table::Type::INT, false}, {"is_unique", columns_schema.table_name()}); + columns_schema.add({table::Type::INT, false}, {"is_primary_key", columns_schema.table_name()}); + auto columns_table = new table::Table(-1, 1, std::move(columns_schema)); + + std::string indices_name("system_indices"); + table::Schema indices_schema{indices_name}; + indices_schema.add({table::Type::INT, false}, {"id", indices_schema.table_name()}); + indices_schema.add({table::Type::INT, false}, {"column_id", indices_schema.table_name()}); + indices_schema.add({table::Type::INT, false}, {"type_id", indices_schema.table_name()}); + indices_schema.add({{table::Type::CHAR, 48}, false}, {"name", indices_schema.table_name()}); + indices_schema.add({table::Type::INT, false}, {"is_unique", indices_schema.table_name()}); + auto indices_table = new table::Table(-1, 2, std::move(indices_schema)); + + std::string table_statistics_name("system_table_statistics"); + table::Schema table_statistics_schema{table_statistics_name}; + table_statistics_schema.add({table::Type::INT, false}, {"table_id", table_statistics_schema.table_name()}); + table_statistics_schema.add({table::Type::LONG, false}, {"cardinality", table_statistics_schema.table_name()}); + auto table_statistics_table = new table::Table(-1, 3, std::move(table_statistics_schema)); + + this->_tables[tables_table->name()] = tables_table; + this->_tables[columns_table->name()] = columns_table; + this->_tables[indices_table->name()] = indices_table; + this->_tables[table_statistics_table->name()] = table_statistics_table; +} + +void Database::create_table(const table::Schema &schema) +{ + auto tables_table = this->_tables["system_tables"]; + auto columns_table = this->_tables["system_columns"]; + auto table_statistics_table = this->_tables["system_table_statistics"]; + + // Persist table. + table::Tuple table_tuple(tables_table->schema(), tables_table->schema().row_size()); + auto table_id = std::int32_t(this->_next_table_id++); + auto name = schema.table_name(); + auto page = this->_buffer_manager.allocate(); + auto page_id = std::int32_t(page->id()); + this->_buffer_manager.unpin(page, false); + table_tuple.set(0, table_id); + table_tuple.set(1, name); + table_tuple.set(2, page_id); + this->_table_disk_manager.add_row(*tables_table, std::move(table_tuple)); + + // Persist columns + for (auto i = 0u; i < schema.size(); i++) + { + const auto &column = schema.column(i); + const auto &attribute = schema.attribute(i); + table::Tuple column_tuple(columns_table->schema(), columns_table->schema().row_size()); + auto column_id = std::int32_t(this->_next_column_id++); + auto column_name = attribute.name; + auto type_id = std::int32_t(static_cast(column.type())); + auto length = std::int32_t(column.type().dynamic_length()); + auto is_nullable = std::int32_t(column.is_nullable()); + auto is_unique = std::int32_t(0); + auto is_primary_key = std::int32_t(0); + column_tuple.set(0, column_id); + column_tuple.set(1, table_id); + column_tuple.set(2, type_id); + column_tuple.set(3, length); + column_tuple.set(4, column_name); + column_tuple.set(5, is_nullable); + column_tuple.set(6, is_unique); + column_tuple.set(7, is_primary_key); + this->_table_disk_manager.add_row(*columns_table, std::move(column_tuple)); + } + + table::Tuple table_statistics_tuple(table_statistics_table->schema(), table_statistics_table->schema().row_size()); + auto cardinality = std::int64_t{0}; + table_statistics_tuple.set(0, table_id); + table_statistics_tuple.set(1, cardinality); + this->_table_disk_manager.add_row(*table_statistics_table, std::move(table_statistics_tuple)); + + this->_tables[schema.table_name()] = new table::Table(table_id, page_id, schema); +} + +void Database::create_index(const table::Column &column, const index::Type type, const std::string &name, + const bool is_unique) +{ + auto *indices_table = this->_tables["system_indices"]; + table::Tuple tuple(indices_table->schema(), indices_table->schema().row_size()); + auto index_id = std::int32_t(this->_next_index_id++); + auto column_id = std::int32_t(column.id()); + auto type_id = std::int32_t(type); + auto index_name = name; + auto unique = std::int32_t(is_unique); + tuple.set(0, index_id); + tuple.set(1, column_id); + tuple.set(2, type_id); + tuple.set(3, index_name); + tuple.set(4, unique); + this->_table_disk_manager.add_row(*indices_table, std::move(tuple)); +} + +void Database::persist_table_statistics(beedb::table::Table *table, const std::uint64_t cardinality) +{ + io::Executor executor(*this); + executor.execute({"update system_table_statistics set cardinality = " + std::to_string(std::int64_t(cardinality)) + + " where table_id = " + std::to_string(table->id()) + ";"}); +} \ No newline at end of file diff --git a/src/disk/buffer_manager.cpp b/src/disk/buffer_manager.cpp new file mode 100644 index 0000000..7100c44 --- /dev/null +++ b/src/disk/buffer_manager.cpp @@ -0,0 +1,150 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include +#include +#include + +using namespace beedb::disk; + +BufferManager::BufferManager(const std::size_t count_frames, StorageManager &space_manager, + std::unique_ptr replacement_strategy) + : _space_manager(space_manager), _replacement_strategy(std::move(replacement_strategy)), _frames(count_frames), + _frame_information(count_frames) +{ +} + +BufferManager::~BufferManager() +{ + // Check no frame is pinned anymore (this would indicate programming failure). + for (const auto &frame_information : this->_frame_information) + { + assert(frame_information.is_pinned() == false && "Not all pages are unpinned on shutdown."); + } + + // Write all dirty pages back to disk. + this->flush(); +} + +Page *BufferManager::pin(const Page::page_id page_id) +{ + this->_pin_sequence++; + + auto frame_information_iterator = this->frame_information(page_id); + const bool is_frame_buffered = frame_information_iterator != this->_frame_information.end(); + if (is_frame_buffered) + { + // Update frame information. + (*frame_information_iterator).increase_pin_count(this->_pin_sequence); + + const auto index = std::distance(this->_frame_information.begin(), frame_information_iterator); + return reinterpret_cast(&(this->_frames[index])); + } + else + { + // Find frame for the pinned page. + const auto frame_index = this->find_frame(); + if (this->_frame_information[frame_index].is_pinned()) + { + throw exception::EvictedPagePinnedException(frame_index); + } + + // Write frame back, if the data was modified. + if (this->_frame_information[frame_index].is_dirty() == true) + { + this->_space_manager.write(reinterpret_cast(this->_frames[frame_index])); + } + + // Load page into frame. + const auto page = this->_space_manager.read(page_id); + auto *frame = &(this->_frames[frame_index]); + std::memcpy(frame, &page, sizeof(Page)); + + // Clear frame information. + this->_frame_information[frame_index].occupy(page.id(), this->_pin_sequence); + this->_evicted_frames++; + + return reinterpret_cast(frame); + } +} + +void BufferManager::unpin(const Page::page_id page_id, const bool is_dirty) +{ + auto frame_information_iterator = this->frame_information(page_id); + if (frame_information_iterator != this->_frame_information.end()) + { + auto &frame_information = *frame_information_iterator; + if (frame_information.pin_count() < 1) + { + throw exception::PageWasNotPinnedException(page_id); + } + frame_information.decrease_pin_count(); + frame_information.is_dirty(frame_information.is_dirty() | is_dirty); + } +} + +Page *BufferManager::allocate() +{ + const auto page_id = this->_space_manager.allocate(); + return this->pin(page_id); +} + +void BufferManager::flush() +{ + for (auto i = 0u; i < this->_frame_information.size(); i++) + { + auto &frame_information = this->_frame_information[i]; + + // Write back, when the frame is dirty. + if (frame_information.is_occupied() && frame_information.is_dirty()) + { + auto &page = reinterpret_cast(this->_frames[i]); + assert(page.id() == frame_information.page_id()); + this->_space_manager.write(page); + this->_frame_information[i].is_dirty(false); + } + } +} + +std::vector::iterator BufferManager::frame_information(const Page::page_id page_id) +{ + return std::find_if(this->_frame_information.begin(), this->_frame_information.end(), + [page_id](const FrameInformation &info) { return info.page_id() == page_id; }); +} + +std::size_t BufferManager::find_frame() +{ + // Check buffer for not occupied pages. + for (auto index = 0u; index < this->_frames.size(); index++) + { + if (this->_frame_information[index].is_occupied() == false) + { + return index; + } + } + + // If no free buffer frame found, replace an occupied one. + return this->_replacement_strategy->find_victim(this->_frame_information); +} \ No newline at end of file diff --git a/src/disk/lfu_strategy.cpp b/src/disk/lfu_strategy.cpp new file mode 100644 index 0000000..cdeb9ef --- /dev/null +++ b/src/disk/lfu_strategy.cpp @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::disk; + +std::size_t LFUStrategy::find_victim(const std::vector &frame_information) +{ + /** + * Assignment (1): Implement the Least Frequently Used eviction strategy. + * + * This strategy evicts that frame from the frame buffer which + * has no pins and is pinned the least times. + * + * This function returns the index of the frame that should be evicted. + * + * Hints for implementation: + * - The Parameter "frame_information" is a list of information about + * all frames and their current states. + * - You can get the information for the frame i by "frame_information[i]". + * - The information offers a method "is_pinned()" that returns true, when + * the frame is pinned at the moment, false otherwise. + * - The information offers a method "count_all_pins()" that returns the number + * of all pins (not only active pins). The number is will be reset on eviction. + * + * Procedure: + * - Scan the frame_information for frames that are not pinned at the moment. + * - Select the frame with the lowest number of all pins. + * - Return the index of that frame. + */ + + std::size_t evict_index = 0u; + + + return evict_index; +} \ No newline at end of file diff --git a/src/disk/lru_k_strategy.cpp b/src/disk/lru_k_strategy.cpp new file mode 100644 index 0000000..f5cb9c0 --- /dev/null +++ b/src/disk/lru_k_strategy.cpp @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::disk; + +std::size_t LRUKStrategy::find_victim(const std::vector &frame_information) +{ + /** + * Assignment (1): Implement the Least Recently Used eviction strategy with regard + * to the last "k" pins. + * + * The normal "LRU" strategy is a special case of this strategy, + * using k = 1: LRU == LRU-1 + * + * The LRU-K strategy takes not just the last reference of a frame into account, + * but the k-th last reference. For more details, take a look into the original + * paper for LRU-K: "The LRU-K page replacement algorithm for database disk buffering". + * + * This function returns the index of the frame that should be evicted. + * + * Hints for implementation: + * - The Parameter "frame_information" is a list of information about + * all frames and their current states. + * - The method "this->k()" returns the parameter "k" for this strategy. + * - You can get the information for the frame i by "frame_information[i]". + * - The information offers a method "is_pinned()" that returns true, when + * the frame is pinned at the moment, false otherwise. + * - "frame_information[i].pin_timestamp(j)" returns the e j-th timestamp a + * query pinned the frame "i". + * + * Procedure: + * - Scan the frame_information for frames that are not pinned at the moment + * and have a (1) overall number of pins lesser than k or (2) greater equal number of pins. + * - If (1) is not empty: Select the frame with the lowest timestamp of the last pin. + * - Otherwise select the frame with the lowest timestamp of the last k pins from (2). + * - Return the index to that frame. + */ + + std::size_t evict_index = 0u; + + + return evict_index; +} \ No newline at end of file diff --git a/src/disk/lru_strategy.cpp b/src/disk/lru_strategy.cpp new file mode 100644 index 0000000..6d251ef --- /dev/null +++ b/src/disk/lru_strategy.cpp @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::disk; + +std::size_t LRUStrategy::find_victim(const std::vector &frame_information) +{ + /** + * Assignment (1): Implement the Least Recently Used eviction strategy. + * + * This strategy evicts that frame from the frame buffer which + * is not pinned at the moment and is pinned lately. + * + * This function returns the index of the frame that should be evicted. + * + * Hints for implementation: + * - The Parameter "frame_information" is a list of information about + * all frames and their current states. + * - You can get the information for the frame i by "frame_information[i]". + * - The information offers a method "is_pinned()" that returns true, when + * the frame is pinned at the moment, false otherwise. + * - "frame_information[i].last_pin_timestamp()" returns the timestamp of the last time a + * query pinned the frame "i". + * + * Procedure: + * - Scan the frame_information for frames that are not pinned at the moment. + * - Select the frame with the lowest timestamp of the last pin. + * - Return the index of that frame. + */ + + std::size_t evict_index = 0u; + + + return evict_index; +} \ No newline at end of file diff --git a/src/disk/storage_manager.cpp b/src/disk/storage_manager.cpp new file mode 100644 index 0000000..cda2f38 --- /dev/null +++ b/src/disk/storage_manager.cpp @@ -0,0 +1,75 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include + +using namespace beedb::disk; + +StorageManager::StorageManager(const std::string &file_name) +{ + this->_storage_file.open(file_name, std::ios::in | std::ios::out | std::ios::binary); + if (this->_storage_file.is_open() == false) + { + // Create file if not exists. Non-existing files can not be opened with in | out only -> add trunc. + this->_storage_file.open(file_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc); + } + + if (this->_storage_file.is_open() == false) + { + throw exception::CanNotOpenStorageFile(); + } + this->_storage_file.seekg(0, std::ios::end); + + assert(this->_storage_file.tellg() % Config::page_size == 0); + this->_count_pages = this->_storage_file.tellg() / Config::page_size; + + assert(sizeof(Page) == Config::page_size); +} + +StorageManager::~StorageManager() +{ + this->_storage_file.close(); +} + +Page StorageManager::read(const Page::page_id page_id) +{ + Page page(page_id); + this->_storage_file.seekg(page.id() * sizeof(Page), std::ios::beg); + this->_storage_file.read(reinterpret_cast(&page), sizeof(Page)); + return page; +} + +void StorageManager::write(Page &page) +{ + this->_storage_file.seekp(page.id() * sizeof(Page), std::ios::beg); + this->_storage_file.write(reinterpret_cast(&page), sizeof(Page)); +} + +Page::page_id StorageManager::allocate() +{ + Page page(this->_count_pages++); + this->write(page); + return page.id(); +} \ No newline at end of file diff --git a/src/execution/add_to_index_operator.cpp b/src/execution/add_to_index_operator.cpp new file mode 100644 index 0000000..c5e1c39 --- /dev/null +++ b/src/execution/add_to_index_operator.cpp @@ -0,0 +1,64 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +AddToIndexOperator::AddToIndexOperator(const std::uint32_t column_index, + const std::shared_ptr index) + : _column_index(column_index), _index(index) +{ +} + +void AddToIndexOperator::open() +{ + this->child()->open(); +} + +void AddToIndexOperator::close() +{ + this->child()->close(); +} + +beedb::util::optional AddToIndexOperator::next() +{ + auto next = this->child()->next(); + if (next == true) + { + if (next->page_id() != disk::Page::INVALID_PAGE_ID) + { + const auto &column = next->schema().column(this->_column_index); + if (column == table::Type::INT) + { + this->_index->put(std::get(next->get(this->_column_index).value()), next->page_id()); + } + else if (column == table::Type::LONG) + { + this->_index->put(std::get(next->get(this->_column_index).value()), next->page_id()); + } + } + return next; + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/aggregate_operator.cpp b/src/execution/aggregate_operator.cpp new file mode 100644 index 0000000..d880506 --- /dev/null +++ b/src/execution/aggregate_operator.cpp @@ -0,0 +1,220 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +//#include +//#include +//#include +// +// using namespace beedb::execution; +// +// AggregateOperator::AggregateOperator(const table::Schema& schema, +// const std::vector &groups, +// const std::vector &columns) +// : AbstractOperator(schema),_groups(groups), _columns(columns) +//{ +//} +// +// void AggregateOperator::initialize() +//{ +// +// /// TODO: Output schema is going to be generated outside of operators! +// +//// const auto& child_schema = this->child(0)->schema(); +// +//// for(auto i = 0u; i < child_schema.size(); i++) { +//// const auto& child_column = child_schema[i]; +//// const auto& child_name = child_column.name(); +//// const auto& child_table_name = child(0)->table_name(); +//// for(const auto &column : this->_columns) { +//// if(child_table_name == column.table_alias() && child_name == column.column_name()) { +//// if(column.is_aggregation()) { +//// this->_schema.add({this->aggregation_column_type(column.aggregation_type(), +/// child_column.type()), child_name}, child_table_name); / const auto index = this->_schema.size() - +/// 1; / this->_schema_index_aggregation_map[index] = column.aggregation_type(); / +/// this->_schema_index_map[index] = i; / this->_aggregation_indices.push_back(index); / break; / } / +///} / } +// +//// for(const auto &column : this->_groups) { +//// if(child_table_name == column.table_alias() && child_name == column.column_name()) { +//// this->_schema.add(child_column, child_table_name); +//// break; +//// } +//// } +//// } +//} +// +// beedb::table::Type AggregateOperator::aggregation_column_type(const beedb::expression::AttributeOrigin aggregation, +// const beedb::table::Type &column_type) +//{ +// if(aggregation == beedb::expression::AttributeOrigin::AGG_COUNT) { +// return table::Type::LONG; +// } else if(aggregation == beedb::expression::AttributeOrigin::AGG_AVG) { +// return table::Type::FLOAT; +// } else { +// return column_type; +// } +//} +// +// std::vector AggregateOperator::execute() +//{ +// std::vector input_tuples = this->child(0)->execute(); +// if(input_tuples.empty() && this->_aggregated) { +// return {}; +// } +// this->_aggregated = true; +// +// Aggregator aggregator; +// std::vector output_tuples; +// +// if(this->_groups.empty()) { +// table::Tuple tuple(this->_schema, this->_schema.row_size()); +// for(const auto index : this->_aggregation_indices) { +// const auto aggregate_function = this->_schema_index_aggregation_map[index]; +// const auto source_index = this->_schema_index_map[index]; +// if(aggregate_function == beedb::expression::AttributeOrigin::AGG_AVG) { +// aggregator.avg(input_tuples, source_index, tuple, index); +// } else if(aggregate_function == beedb::expression::AttributeOrigin::AGG_MIN) { +// aggregator.min(input_tuples, source_index, tuple, index); +// } else if(aggregate_function == beedb::expression::AttributeOrigin::AGG_MAX) { +// aggregator.max(input_tuples, source_index, tuple, index); +// } else if(aggregate_function == beedb::expression::AttributeOrigin::AGG_SUM) { +// aggregator.sum(input_tuples, source_index, tuple, index); +// } else if(aggregate_function == beedb::expression::AttributeOrigin::AGG_COUNT) { +// aggregator.count(input_tuples, tuple, index); +// } +// } +// output_tuples.push_back(std::move(tuple)); +// } else { +// std::vector indices; +// std::unordered_map> hash_aggregation; +// for(auto& tuple : input_tuples) { +// const auto hash = this->hash(indices, tuple); +// hash_aggregation[hash].push_back(table::Tuple(tuple.schema(), std::move(tuple))); +// } +// +// output_tuples.reserve(hash_aggregation.size()); +// const auto row_size = this->_schema.row_size(); +// for(auto& [_, input_tuples] : hash_aggregation) { +// table::Tuple out_tuple(this->_schema, row_size); +// for(const auto index : this->_aggregation_indices) { +// const auto aggregate_function = this->_schema_index_aggregation_map[index]; +// const auto source_index = this->_schema_index_map[index]; +// if(aggregate_function == expression::AttributeOrigin::AGG_AVG) { +// aggregator.avg(input_tuples, source_index, out_tuple, index); +// } else if(aggregate_function == expression::AttributeOrigin::AGG_MIN) { +// aggregator.min(input_tuples, source_index, out_tuple, index); +// } else if(aggregate_function == expression::AttributeOrigin::AGG_MAX) { +// aggregator.max(input_tuples, source_index, out_tuple, index); +// } else if(aggregate_function == expression::AttributeOrigin::AGG_SUM) { +// aggregator.sum(input_tuples, source_index, out_tuple, index); +// } else if(aggregate_function == expression::AttributeOrigin::AGG_COUNT) { +// aggregator.count(input_tuples, out_tuple, index); +// } +// } +// output_tuples.push_back(std::move(out_tuple)); +// } +// } +// +// return output_tuples; +//} +// +// void Aggregator::avg(const std::vector &source_tuples, const std::size_t source_index, table::Tuple +// &target_tuple, const std::size_t target_index) +//{ +// if(source_tuples.empty()) { +// return; +// } +// +// auto source_type = source_tuples[0].schema()[source_index].type(); +// table::Value sum { source_type, 0 }; +// for(const auto& tuple : source_tuples) { +// sum += tuple.get(source_index); +// } +// +// float avg; +// if(sum == table::Type::LONG) { +// avg = sum.get() / float(source_tuples.size()); +// } else if(sum == table::Type::INT) { +// avg = sum.get() / float(source_tuples.size()); +// } else if(sum == table::Type::FLOAT) { +// avg = sum.get() / float(source_tuples.size()); +// } else if(sum == table::Type::DOUBLE) { +// avg = sum.get() / float(source_tuples.size()); +// } +// +// target_tuple.set(target_index, avg); +//} +// +// void Aggregator::min(const std::vector &source_tuples, const std::size_t source_index, table::Tuple +// &target_tuple, const std::size_t target_index) +//{ +// auto max_value = target_tuple.get(target_index).max(); +// target_tuple.set(target_index, max_value); +// +// for(const auto& tuple : source_tuples) { +// auto v = tuple.get(source_index); +// if(v < target_tuple.get(target_index)) { +// target_tuple.set(target_index, v); +// } +// } +//} +// +// void Aggregator::max(const std::vector &source_tuples, const std::size_t source_index, table::Tuple +// &target_tuple, const std::size_t target_index) +//{ +// auto min_value = target_tuple.get(target_index).min(); +// target_tuple.set(target_index, min_value); +// +// for(const auto& tuple : source_tuples) { +// auto v = tuple.get(source_index); +// if(v > target_tuple.get(target_index)) { +// target_tuple.set(target_index, v); +// } +// } +//} +// +// void Aggregator::sum(const std::vector &source_tuples, const std::size_t source_index, table::Tuple +// &target_tuple, const std::size_t target_index) +//{ +// auto sum = target_tuple.get(target_index); +// for(const auto& tuple : source_tuples) { +// sum += tuple.get(source_index); +// } +// target_tuple.set(target_index, sum); +//} +// +// void Aggregator::count(const std::vector &source_tuples, table::Tuple &target_tuple, const std::size_t +// target_index) +//{ +// auto result = target_tuple.get(target_index).get() + std::int64_t(source_tuples.size()); +// target_tuple.set(target_index, result); +//} +// +// std::size_t AggregateOperator::hash(const std::vector &group_indices, +// const beedb::table::Tuple &tuple) +//{ +// std::size_t h = 0u; +// for(const auto index : group_indices) { +// h += std::hash()(tuple.get(index)); +// } +// return std::hash()(h); +//} \ No newline at end of file diff --git a/src/execution/binary_operator.cpp b/src/execution/binary_operator.cpp new file mode 100644 index 0000000..5337959 --- /dev/null +++ b/src/execution/binary_operator.cpp @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::execution; + +beedb::table::Tuple BinaryOperator::combine(const table::Schema &new_schema, const beedb::table::Tuple &left, + const beedb::table::Tuple &right) const +{ + auto tuple = table::Tuple{new_schema, new_schema.row_size()}; + std::memcpy(tuple.data(), left.data(), left.schema().row_size()); + std::memcpy(tuple.data() + left.schema().row_size(), right.data(), right.schema().row_size()); + + return tuple; +} \ No newline at end of file diff --git a/src/execution/build_index_operator.cpp b/src/execution/build_index_operator.cpp new file mode 100644 index 0000000..40334b4 --- /dev/null +++ b/src/execution/build_index_operator.cpp @@ -0,0 +1,84 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +BuildIndexOperator::BuildIndexOperator(Database &database, const std::string &table_name, + const table::Schema::ColumnIndexType column_index, const std::string &index_name) + : _database(database), _table_name(table_name), _column_index(column_index), _index_name(index_name) +{ +} + +void BuildIndexOperator::open() +{ + if (this->left_child() != nullptr) + { + this->left_child()->open(); + } + + this->right_child()->open(); +} + +void BuildIndexOperator::close() +{ + if (this->left_child() != nullptr) + { + this->left_child()->close(); + } + + this->right_child()->close(); +} + +beedb::util::optional BuildIndexOperator::next() +{ + if (this->left_child() != nullptr) + { + this->left_child()->next(); + } + + auto *table = this->_database.table(this->_table_name); + const auto &column_to_index = table->schema().column(this->_column_index); + auto index = column_to_index.index(this->_index_name); + auto tuple_to_index = this->right_child()->next(); + while (tuple_to_index == true) + { + if (tuple_to_index->page_id() != disk::Page::INVALID_PAGE_ID) + { + if (column_to_index == table::Type::INT) + { + index->put(std::get(tuple_to_index->get(this->_column_index).value()), + tuple_to_index->page_id()); + } + else if (column_to_index == table::Type::LONG) + { + index->put(std::get(tuple_to_index->get(this->_column_index).value()), + tuple_to_index->page_id()); + } + } + + tuple_to_index = this->right_child()->next(); + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/create_index_operator.cpp b/src/execution/create_index_operator.cpp new file mode 100644 index 0000000..78f1603 --- /dev/null +++ b/src/execution/create_index_operator.cpp @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::execution; + +CreateIndexOperator::CreateIndexOperator(Database &database, const std::string &table_name, + const expression::Attribute &attribute, const std::string &index_name, + const bool is_unique, const index::Type type) + : _database(database), _table_name(table_name), _attribute(attribute), _index_name(index_name), + _is_unique(is_unique), _index_type(type) +{ +} + +beedb::util::optional CreateIndexOperator::next() +{ + auto *table = this->_database[this->_table_name]; + auto &column = (*table)[this->_attribute]; + + // Persist index + this->_database.create_index(column, this->_index_type, this->_index_name, this->_is_unique); + + // Create index + auto index = index::IndexFactory::new_index(this->_index_name, this->_index_type, this->_is_unique); + column.add_index(std::move(index)); + + return {}; +} \ No newline at end of file diff --git a/src/execution/create_table_operator.cpp b/src/execution/create_table_operator.cpp new file mode 100644 index 0000000..b630401 --- /dev/null +++ b/src/execution/create_table_operator.cpp @@ -0,0 +1,36 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +CreateTableOperator::CreateTableOperator(beedb::Database &database, const table::Schema schema_to_create) + : _database(database), _schema_to_create(schema_to_create) +{ +} + +beedb::util::optional CreateTableOperator::next() +{ + this->_database.create_table(this->_schema_to_create); + return {}; +} \ No newline at end of file diff --git a/src/execution/cross_product_operator.cpp b/src/execution/cross_product_operator.cpp new file mode 100644 index 0000000..9b123dd --- /dev/null +++ b/src/execution/cross_product_operator.cpp @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +CrossProductOperator::CrossProductOperator(const table::Schema &&schema) : _schema(schema) +{ +} + +void CrossProductOperator::open() +{ + this->left_child()->open(); + this->right_child()->open(); + + this->_next_left_tuple = this->left_child()->next(); +} + +void CrossProductOperator::close() +{ + this->left_child()->close(); + this->right_child()->close(); +} + +beedb::util::optional CrossProductOperator::next() +{ + if (this->_next_left_tuple == false) + { + return {}; + } + + while (this->_next_left_tuple == true) + { + auto next_right_tuple = this->right_child()->next(); + if (next_right_tuple == true) + { + return {this->combine(this->_schema, this->_next_left_tuple, next_right_tuple)}; + } + this->right_child()->close(); + this->right_child()->open(); + this->_next_left_tuple = this->left_child()->next(); + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/hash_join_operator.cpp b/src/execution/hash_join_operator.cpp new file mode 100644 index 0000000..1a9ddd3 --- /dev/null +++ b/src/execution/hash_join_operator.cpp @@ -0,0 +1,144 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +HashJoinOperator::HashJoinOperator(const table::Schema schema, const std::uint32_t left_index, + const std::uint32_t right_index) + : _schema(schema), _left_index(left_index), _right_index(right_index), _hash_table(_left_index) +{ +} + +void HashJoinOperator::build_hash_table() +{ + /** + * Assignment (4): Implement the HashJoin operator + * + * The hash join will join from to sources by hashing one and probing + * the other. + * + * This function builds the hash table by scanning and hashing all + * tuples from the left child. + * + * Hints for implementation: + * - The methods "this->left_child()" and "this->right_child()" + * grant access to both children of the operator. + * - Each operator has a "next()" method, which returns the next + * tuple from that operator. But, that tuple may be empty (when + * there is no tuple to return). + * - You can test whether the a tuple is not empty with "tuple == true". + * - You can add a tuple to the hash table by "this->_hash_table.put(tuple)". + * + * Procedure: + * - Iterate over the left child and add them into the hash table. + */ + +} + +beedb::util::optional HashJoinOperator::probe_hash_table() +{ + /** + * Assignment (4): Implement the HashJoin operator + * + * The hash join will join from to sources by hashing one and probing + * the other. + * + * This function probes the built hash table and returns the next + * tuple matching the join condition. + * + * Hints for implementation: + * - The methods "this->left_child()" and "this->right_child()" + * grant access to both children of the operator. + * - Each operator has a "next()" method, which returns the next + * tuple from that operator. But, that tuple may be empty (when + * there is no tuple to return). + * - You can test whether the a tuple is not empty with "tuple == true". + * - Each tuple has a "get(i)" method which returns the value of + * the tuple at index "i". + * - The required indices for joining the left and the right child + * are stored in "this->_left_index" and "this->_right_index". + * - You can test whether the hash table contains a candidate for a + * value by "this->_hash_table.contains(value)". + * - You can get all matching tuples from the hash table by + * "this->_hash_table.get(value)" which returns a reference + * to a vector containing the tuples. + * - You can combine two tuples using the combine method + * "this->combine(this->_schema, tuple_1, tuple_2)", which produces + * a new tuple. + * - Because of one match may create more than one tuple, but you can + * only return one tuple, you have to buffer the tuples created by + * a single match. + * - For buffering tuples, you can use the TupleBuffer ("this->_tuple_buffer"). + * - You can add tuples to the buffer by "this->_tuple_buffer.add(tuple)". + * - You can get the next tuple from the buffer by "this->_tuple_buffer.pop". + * + * Procedure: + * - Iterate over the right child. + * - For every tuple, check whether the build hash table + * contains tuples that matches. Remember: The key for the hash table is + * a value at the index "this->_right_index" of a tuple from the right child. + * - For every tuple from the right child and all tuples stored in the hash table + * create a new tuple use "this->combine(tuple_1, tuple_2)". + * - Add all new created tuples to the tuple buffer. + * - Return the first tuple from the buffer by "popping" it. + */ + + + return {}; +} + +void HashJoinOperator::open() +{ + this->left_child()->open(); + this->right_child()->open(); +} + +void HashJoinOperator::close() +{ + this->left_child()->close(); + this->right_child()->close(); +} + +beedb::util::optional HashJoinOperator::next() +{ + // Build phase + if (this->_is_built == false) + { + this->build_hash_table(); + this->_is_built = true; + } + + // In case there are tuple in the buffer, return them first. + if (this->_tuple_buffer.empty() == false) + { + return {this->_tuple_buffer.pop()}; + } + else + { + this->_tuple_buffer.clear(); + } + + // Probe phase + return this->probe_hash_table(); +} \ No newline at end of file diff --git a/src/execution/index_scan_operator.cpp b/src/execution/index_scan_operator.cpp new file mode 100644 index 0000000..0913ed3 --- /dev/null +++ b/src/execution/index_scan_operator.cpp @@ -0,0 +1,152 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include + +using namespace beedb::execution; + +IndexScanOperator::IndexScanOperator(const std::uint32_t scan_page_limit, const beedb::table::Schema &schema, + beedb::disk::BufferManager &buffer_manager, + beedb::table::TableDiskManager &table_disk_manager, std::set &key_ranges, + std::shared_ptr index) + : _scan_page_limit(scan_page_limit), _schema(schema), _buffer_manager(buffer_manager), + _table_disk_manager(table_disk_manager), _key_ranges(std::move(key_ranges)), _index(index) +{ +} + +void IndexScanOperator::open() +{ + for (const auto &key_range : this->_key_ranges) + { + if (key_range.is_single_key()) + { + if (this->_index->is_unique()) + { + const auto page = + dynamic_cast(this->_index.get())->get(key_range.single_key()); + if (page.has_value()) + { + this->_pages_to_scan.push(page.value()); + } + } + else + { + const auto pages = + dynamic_cast(this->_index.get())->get(key_range.single_key()); + if (pages.has_value()) + { + for (const auto page : pages.value()) + { + this->_pages_to_scan.push(page); + } + } + } + } + else + { + const auto pages = + dynamic_cast(this->_index.get())->get(key_range.from(), key_range.to()); + if (pages.has_value()) + { + for (const auto page : pages.value()) + { + this->_pages_to_scan.push(page); + } + } + } + } +} + +void IndexScanOperator::close() +{ + if (this->_pinned_pages.empty() == false) + { + for (const auto page_id : this->_pinned_pages) + { + this->_buffer_manager.unpin(page_id, false); + } + this->_pinned_pages.clear(); + } +} + +beedb::util::optional IndexScanOperator::next() +{ + if (this->_buffer.empty() == false) + { + auto next = this->_buffer.pop(); + return {std::move(next)}; + } + + this->_buffer.clear(); + + if (this->_pinned_pages.empty() == false) + { + for (const auto page_id : this->_pinned_pages) + { + this->_buffer_manager.unpin(page_id, false); + } + this->_pinned_pages.clear(); + } + + if (this->_pages_to_scan.empty()) + { + return {}; + } + + // When we need more, scan pages max pages. + for (auto i = 0u; i < this->_scan_page_limit; i++) + { + if (this->_pages_to_scan.empty()) + { + break; + } + + auto next_page_id = this->_pages_to_scan.front(); + this->_pages_to_scan.pop(); + auto page = this->_buffer_manager.pin(next_page_id); + auto tuples = this->_table_disk_manager.read_rows(page, this->_schema); + + if (tuples.empty() == false) + { + this->_buffer.add(tuples); + this->_pinned_pages.push_back(page->id()); + } + else + { + this->_buffer_manager.unpin(page, false); + break; + } + } + + if (this->_buffer.empty() == false) + { + auto next = this->_buffer.pop(); + return {std::move(next)}; + } + else + { + return {}; + } +} \ No newline at end of file diff --git a/src/execution/insert_operator.cpp b/src/execution/insert_operator.cpp new file mode 100644 index 0000000..d30cd20 --- /dev/null +++ b/src/execution/insert_operator.cpp @@ -0,0 +1,62 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +InsertOperator::InsertOperator(beedb::disk::BufferManager &buffer_manager, + beedb::table::TableDiskManager &table_disk_manager, + beedb::statistic::SystemStatistics &statistics, beedb::table::Table &table) + : _buffer_manager(buffer_manager), _table_disk_manager(table_disk_manager), _statistics(statistics), _table(table) +{ +} + +void InsertOperator::open() +{ + this->child()->open(); +} + +void InsertOperator::close() +{ + this->child()->close(); +} + +beedb::util::optional InsertOperator::next() +{ + if (this->_last_pinned_page != disk::Page::INVALID_PAGE_ID) + { + this->_buffer_manager.unpin(this->_last_pinned_page, true); + this->_last_pinned_page = disk::Page::INVALID_PAGE_ID; + } + + auto next = this->child()->next(); + if (next == true) + { + auto tuple = this->_table_disk_manager.add_row(this->_table, this->_table.schema(), std::move(next.value())); + this->_last_pinned_page = tuple.page_id(); + this->_statistics.table_statistics().add_cardinality(this->_table, 1); + return {std::move(tuple)}; + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/limit_operator.cpp b/src/execution/limit_operator.cpp new file mode 100644 index 0000000..1e3efec --- /dev/null +++ b/src/execution/limit_operator.cpp @@ -0,0 +1,80 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +LimitOperator::LimitOperator(const beedb::table::Schema &schema, const std::uint64_t limit, const std::uint64_t offset) + : _schema(schema), _limit(limit), _offset(offset) +{ +} + +void LimitOperator::open() +{ + this->child()->open(); +} + +void LimitOperator::close() +{ + this->child()->close(); +} + +beedb::util::optional LimitOperator::next() +{ + /** + * Assignment (3): Implement the operator "LIMIT" + * + * The limit operator reduces the number of tuples by the query. + * Limit takes two inputs: The overall number of tuples, that should be + * produced by the query ond the offset. The latter one is an optional + * parameter, which is set to zero by default. + * + * This function returns a tuple or no tuple each time the + * parent operator calls the next() function. + * + * Hints for implementation: + * - The limit and the offset are accessible by + * "this->_limit" and "this->_offset". + * - You can return "no tuple" by "return { };" + * - You can ask the child operator for the next tuple by + * "this->child()->next()" which is an optional tuple that + * may contain a tuple or not. + * - You can ask the optional tuple if it has a value with + * "tuple->has_value()". + * - The type "beedb::util::optional" is inspired by std::optional, + * take a look to https://en.cppreference.com/w/cpp/utility/optional + * + * + * Procedure: + * - The first time this function is called skip "this->_offset" + * tuples. + * - Ask the child operator for the next tuple and return it until + * you reached the limit. + * - When the limit is reached (you have to count by yourself) + * or the tuple from children has no value, return (also) + * no tuple (by "return {};"). + */ + + + return {}; +} \ No newline at end of file diff --git a/src/execution/nested_loops_join_operator.cpp b/src/execution/nested_loops_join_operator.cpp new file mode 100644 index 0000000..66727ef --- /dev/null +++ b/src/execution/nested_loops_join_operator.cpp @@ -0,0 +1,117 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +NestedLoopsJoinOperator::NestedLoopsJoinOperator(const table::Schema &&schema, + std::unique_ptr predicate_matcher) + : _schema(schema), _predicate_matcher(std::move(predicate_matcher)) +{ +} + +void NestedLoopsJoinOperator::open() +{ + /** + * Assignment (4): Implement the NestedLoopsJoin operator + * + * The nested loops join will join from to sources by two loops. + * The outer loop will iterate over the left child, the inner loop + * will iterate over the right child to combine all matching tuples. + * + * This function opens the operator. + * + * Hints for implementation: + * - The methods "this->left_child()" and "this->right_child()" + * grant access to both children of the operator. + * - Each operator has an "open()" method. + * - Each operator has a "next()" method, which returns the next + * tuple from that operator. + * + * Procedure: + * - Open both children. + * - Store the first next tuple from the left child. You can store it + * in "this->_next_left_tuple". + */ + +} + +void NestedLoopsJoinOperator::close() +{ + /** + * Assignment (4): Implement the NestedLoopsJoin operator + * + * The nested loops join will join from to sources by two loops. + * The outer loop will iterate over the left child, the inner loop + * will iterate over the right child to combine all matching tuples. + * + * This function closes the operator. + * + * Hints for implementation: + * - The methods "this->left_child()" and "this->right_child()" + * grant access to both children of the operator. + * - Each operator has an "close()" method. + * + * Procedure: + * - Close both children. + */ + +} + +beedb::util::optional NestedLoopsJoinOperator::next() +{ + /** + * Assignment (4): Implement the NestedLoopsJoin operator + * + * The nested loops join will join from to sources by two loops. + * The outer loop will iterate over the left child, the inner loop + * will iterate over the right child to combine all matching tuples. + * + * This function return the next tuple produced by this operator. + * + * Hints for implementation: + * - The methods "this->left_child()" and "this->right_child()" + * grant access to both children of the operator. + * - Each operator has a "next()" method, which returns the next + * tuple from that operator. + * - "this->_next_left_tuple" stores the next tuple from the left child. + * - You can test whether the a tuple holds a value with "tuple == true", + * e.g. "this->_next_left_tuple == true". + * - You can test whether two tuples matches the join condition using + * "this->matches(tuple_1, tuple_2)" which may return true or false. + * - You can combine two tuples using the combine method + * "this->combine(this->_schema, tuple_1, tuple_2)", which produces + * a new tuple. + * + * Procedure: + * - Iterate over the left child and store the next tuple in "this->_next_left_tuple". + * - Iterate over the right child until you found a tuple that matches the + * join condition for the next left and right tuple. + * - Produce and return a new tuple by combining the two matching tuples. + * - After each inner iteration close and open the right child to start + * at the first tuple of the right child. + */ + + + return {}; +} \ No newline at end of file diff --git a/src/execution/order_operator.cpp b/src/execution/order_operator.cpp new file mode 100644 index 0000000..3db1eb5 --- /dev/null +++ b/src/execution/order_operator.cpp @@ -0,0 +1,72 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::execution; + +OrderOperator::OrderOperator(const beedb::table::Schema &schema, + std::vector> &&order_columns) + : _schema(schema), _order_columns(order_columns) +{ +} + +void OrderOperator::open() +{ + this->child()->open(); +} + +void OrderOperator::close() +{ + this->child()->close(); +} + +beedb::util::optional OrderOperator::next() +{ + if (this->_result_table == nullptr) + { + this->_result_table.reset(new table::MemoryTable(this->_schema)); + auto tuple = this->child()->next(); + while (tuple == true) + { + this->_result_table->copy_to_memory(tuple); + tuple = this->child()->next(); + } + + if (this->_result_table->empty()) + { + return {}; + } + + auto comparator = TupleComparator{this->_order_columns}; + util::Quicksort::sort(this->_result_table->tuples(), comparator); + } + + if (this->_stack_index < this->_result_table->size()) + { + auto &next_tuple = this->_result_table->tuples()[this->_stack_index++]; + return {std::move(next_tuple)}; + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/projection_operator.cpp b/src/execution/projection_operator.cpp new file mode 100644 index 0000000..91d4123 --- /dev/null +++ b/src/execution/projection_operator.cpp @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +ProjectionOperator::ProjectionOperator(const table::Schema schema) : _schema(schema) +{ +} + +void ProjectionOperator::open() +{ + this->child()->open(); +} + +void ProjectionOperator::close() +{ + this->child()->close(); +} + +beedb::util::optional ProjectionOperator::next() +{ + auto next_tuple = this->child()->next(); + if (next_tuple == true) + { + return {table::Tuple(this->_schema, std::move(next_tuple.value()))}; + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/selection_operator.cpp b/src/execution/selection_operator.cpp new file mode 100644 index 0000000..4b065b9 --- /dev/null +++ b/src/execution/selection_operator.cpp @@ -0,0 +1,56 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +SelectionOperator::SelectionOperator(const table::Schema &schema, + std::unique_ptr predicate_matcher) + : _schema(schema), _predicate_matcher(std::move(predicate_matcher)) +{ +} + +void SelectionOperator::open() +{ + this->child()->open(); +} + +void SelectionOperator::close() +{ + this->child()->close(); +} + +beedb::util::optional SelectionOperator::next() +{ + auto tuple = this->child()->next(); + while (tuple == true) + { + if (this->matches(tuple)) + { + return tuple; + } + tuple = this->child()->next(); + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/sequential_scan_operator.cpp b/src/execution/sequential_scan_operator.cpp new file mode 100644 index 0000000..b1f9fd4 --- /dev/null +++ b/src/execution/sequential_scan_operator.cpp @@ -0,0 +1,112 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +SequentialScanOperator::SequentialScanOperator(const std::uint32_t scan_page_limit, const beedb::table::Schema &schema, + beedb::disk::BufferManager &buffer_manager, + beedb::table::TableDiskManager &table_disk_manager, + const beedb::table::Table &table) + : _scan_page_limit(scan_page_limit), _schema(schema), _buffer_manager(buffer_manager), + _table_disk_manager(table_disk_manager), _table(table) +{ +} + +void SequentialScanOperator::open() +{ + this->_next_page_id_to_scan = this->_table.page_id(); +} + +void SequentialScanOperator::close() +{ + if (this->_pinned_pages.empty() == false) + { + for (const auto page_id : this->_pinned_pages) + { + this->_buffer_manager.unpin(page_id, false); + } + this->_pinned_pages.clear(); + } +} + +beedb::util::optional SequentialScanOperator::next() +{ + if (this->_buffer.empty() == false) + { + auto next = this->_buffer.pop(); + return {std::move(next)}; + } + + this->_buffer.clear(); + + if (this->_pinned_pages.empty() == false) + { + for (const auto page_id : this->_pinned_pages) + { + this->_buffer_manager.unpin(page_id, false); + } + this->_pinned_pages.clear(); + } + + if (this->_next_page_id_to_scan == disk::Page::INVALID_PAGE_ID) + { + return {}; + } + + // When we need more, scan pages max pages. + for (auto i = 0u; i < this->_scan_page_limit; i++) + { + if (this->_next_page_id_to_scan == disk::Page::INVALID_PAGE_ID) + { + break; + } + + auto page = this->_buffer_manager.pin(this->_next_page_id_to_scan); + auto tuples = this->_table_disk_manager.read_rows(page, this->_schema); + + if (tuples.empty() == false) + { + this->_buffer.add(tuples); + this->_pinned_pages.push_back(page->id()); + } + else + { + this->_buffer_manager.unpin(page, false); + this->_next_page_id_to_scan = disk::Page::INVALID_PAGE_ID; + break; + } + + this->_next_page_id_to_scan = page->next_page_id(); + } + + if (this->_buffer.empty() == false) + { + auto next = this->_buffer.pop(); + return {std::move(next)}; + } + else + { + return {}; + } +} \ No newline at end of file diff --git a/src/execution/tuple_buffer_operator.cpp b/src/execution/tuple_buffer_operator.cpp new file mode 100644 index 0000000..df5c273 --- /dev/null +++ b/src/execution/tuple_buffer_operator.cpp @@ -0,0 +1,39 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +TupleBufferOperator::TupleBufferOperator(const beedb::table::Schema schema) : _schema(schema) +{ +} + +beedb::util::optional TupleBufferOperator::next() +{ + if (this->_tuple_buffer.empty() == false) + { + return {this->_tuple_buffer.pop()}; + } + + return {}; +} \ No newline at end of file diff --git a/src/execution/update_operator.cpp b/src/execution/update_operator.cpp new file mode 100644 index 0000000..fb13f0f --- /dev/null +++ b/src/execution/update_operator.cpp @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::execution; + +void UpdateOperator::open() +{ + this->child()->open(); +} + +void UpdateOperator::close() +{ + this->child()->close(); +} + +beedb::util::optional UpdateOperator::next() +{ + auto next = this->child()->next(); + while (next == true) + { + for (const auto &update : this->_new_column_values) + { + next->set(update.first, update.second); + } + this->_table_disk_manager.update_row(next); + next = this->child()->next(); + } + + return {}; +} \ No newline at end of file diff --git a/src/expression/attribute.cpp b/src/expression/attribute.cpp new file mode 100644 index 0000000..d9eacfd --- /dev/null +++ b/src/expression/attribute.cpp @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +namespace beedb::expression +{ +std::ostream &operator<<(std::ostream &stream, const Attribute &attribute) +{ + return stream << attribute.combined_name; +} +} // namespace beedb::expression \ No newline at end of file diff --git a/src/expression/predicate.cpp b/src/expression/predicate.cpp new file mode 100644 index 0000000..690d352 --- /dev/null +++ b/src/expression/predicate.cpp @@ -0,0 +1,95 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::expression; + +beedb::expression::Attributes PredicateAnalyzer::attributes(const Predicate &predicate) +{ + Attributes attributes; + if (std::holds_alternative>(predicate)) + { + const auto &connective = std::get>(predicate); + auto left_attributes = PredicateAnalyzer::attributes(connective->left); + auto right_attributes = PredicateAnalyzer::attributes(connective->right); + for (const auto &attr : left_attributes) + { + attributes.push_back(attr); + } + for (const auto &attr : right_attributes) + { + attributes.push_back(attr); + } + } + else if (std::holds_alternative>(predicate)) + { + const auto &atom = std::get>(predicate); + if (std::holds_alternative(atom->left)) + { + attributes.push_back(std::get(atom->left)); + } + + if (std::holds_alternative(atom->right)) + { + attributes.push_back(std::get(atom->right)); + } + } + + return attributes; +} + +bool PredicateAnalyzer::contains_range_predicate(const Predicate &predicate) +{ + if (std::holds_alternative>(predicate)) + { + const auto &connective = std::get>(predicate); + return PredicateAnalyzer::contains_range_predicate(connective->left) || + PredicateAnalyzer::contains_range_predicate(connective->right); + } + else if (std::holds_alternative>(predicate)) + { + const auto &atom = std::get>(predicate); + auto atom_ptr = atom.get(); + return (typeid(*atom_ptr) == typeid(expression::EQ) || typeid(*atom_ptr) == typeid(expression::NEQ)) == false; + } + + return false; +} + +bool PredicateAnalyzer::contains_not_equals_predicate(const Predicate &predicate) +{ + if (std::holds_alternative>(predicate)) + { + const auto &connective = std::get>(predicate); + return PredicateAnalyzer::contains_not_equals_predicate(connective->left) || + PredicateAnalyzer::contains_not_equals_predicate(connective->right); + } + else if (std::holds_alternative>(predicate)) + { + const auto &atom = std::get>(predicate); + auto atom_ptr = atom.get(); + return typeid(*atom_ptr) == typeid(expression::NEQ); + } + + return false; +} \ No newline at end of file diff --git a/src/include/compression/wah_bit_vector.h b/src/include/compression/wah_bit_vector.h new file mode 100644 index 0000000..8fc50b5 --- /dev/null +++ b/src/include/compression/wah_bit_vector.h @@ -0,0 +1,98 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include + +namespace beedb::compression +{ +class WAHBitVector +{ + public: + WAHBitVector() : _is_literal_word(0), _content(0) + { + } + ~WAHBitVector() = default; + + inline bool is_fill() const + { + return _is_literal_word == 0; + } + inline bool is_literal() const + { + return _is_literal_word; + } + inline void is_fill(bool is_fill) + { + _is_literal_word = !is_fill; + } + + inline bool fill_bit() const + { + return _content >> 30; + } + inline void fill_bit(const bool fill_bit) + { + _content = (fill_bit << 30) | count(); + } + + inline std::uint32_t count() const + { + return _content & 0x3FFFFFFF; + } + + WAHBitVector &operator++() + { + _content = (count() + 1) | (fill_bit() << 30); + return *this; + } + + WAHBitVector operator++(int) + { + WAHBitVector copy(*this); + ++(*this); + return copy; + } + + inline void set(const std::size_t index, const bool bit) + { + assert(index >= 0 && index <= 31); + _content ^= (-bit ^ _content) & (1UL << index); + } + + inline bool get(const std::size_t index) const + { + assert(index >= 0 && index <= 31); + return (_content >> index) & 1U; + } + + inline void clear() + { + _content = 0; + } + + private: + std::uint32_t _is_literal_word : 1, _content : 31; +} __attribute__((packed)); +} // namespace beedb::compression \ No newline at end of file diff --git a/src/include/config.h b/src/include/config.h new file mode 100644 index 0000000..2e0c07a --- /dev/null +++ b/src/include/config.h @@ -0,0 +1,182 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace beedb +{ +/** + * Holds all configuration for the DBMS. + */ +class Config +{ + public: + using ConfigKey = std::string; + using ConfigValue = std::int32_t; // TODO: consider renaming this or the struct below...? + + ///// COMPILE TIME OPTIONS - CHANGES REQUIRE REBUILDING + static constexpr std::uint16_t page_size = 4096; + static constexpr std::uint16_t b_plus_tree_page_size = 1024; + + public: + // defining constants and other values, for convenience and ease-of-reading + enum BufferReplacementStrategy + { + LRU, + LRU_K, + LFU + }; + + // important key's for non-string based notation: + static constexpr auto k_PageSize = "page_size"; + static constexpr auto k_BPlusTreePageSize = "b_plus_tree_page_size"; + static constexpr auto k_ScanPageLimit = "scan_page_limit"; + + static constexpr auto k_BufferFrames = "buffer_frames"; + static constexpr auto k_BufferReplacementStrategy = "buffer_replacement_strategy"; + static constexpr auto k_LRU_K = "lru_k"; + + static constexpr auto k_CheckFinalPlan = "check_final_plan"; + + static constexpr auto k_OptimizationEnableHashJoin = "enable_hash_join"; + static constexpr auto k_OptimizationEnableIndexScan = "enable_index_scan"; + static constexpr auto k_OptimizationDisableOptimization = "no_optimization"; + + static constexpr auto k_PrintExecutionStatistics = "print_execution_statistics"; + + // this object represents a ConfigValue in the map and stores some meta information + struct ConfigMapValue + { + ConfigValue value; + bool is_mutable = true; + bool requires_restart = false; + + operator ConfigValue() const + { + return value; + } + + operator bool() const + { + return value != 0u; + } + + operator BufferReplacementStrategy() + { + return static_cast(value); + } + + static constexpr bool immutable = false; + }; + + Config() + { + // for transparency, we make compile-time options available in the config map (read only) + _configuration.insert({k_PageSize, {page_size, ConfigMapValue::immutable}}); + _configuration.insert({k_BPlusTreePageSize, {b_plus_tree_page_size, ConfigMapValue::immutable}}); + } + + ~Config() = default; // TODO: persist changes in config + + /** + * @brief operator [] Read only access to values of the Configuration! + * + * This method is identical to get. + * + * @param key + * @return + */ + ConfigMapValue operator[](const ConfigKey &key) const + { + if (_configuration.find(key) == _configuration.end()) + { + throw exception::ConfigException(key); + } + return _configuration.at(key); + } + + ConfigMapValue get(const ConfigKey &key) const + { + return this->operator[](key); + } + + /** + * @brief set sets a configuration value. Can override existing values, if the flag ConfigMapValue.is_mutable is not + * set! + * @param key a new or existing key + * @param value the new value + * @param is_mutable defaults to true + * @param requires_restart defaults to false. currently unused. + * @return the input value, returned from the map + */ + ConfigMapValue set(ConfigKey key, ConfigValue value, bool is_mutable = true, bool requires_restart = false) + { + if (_configuration.find(key) != _configuration.end()) + { + // if this key already exists, check if it is read only + if (!_configuration[key].is_mutable) + { + throw exception::CanNotModifyAtRuntimeException(key); + } + } + // // TODO implement: persist config map on shutdown and uncomment this + // if (_configuration[key].requires_restart) { + // std::cout << "Note: " << "This option requires a restart of the application to take effect!" + // << std::endl; + // } + + return _configuration[key] = {value, is_mutable, requires_restart}; + } + + bool contains(ConfigKey key) const + { + return _configuration.find(key) != _configuration.end(); + } + + operator std::string() + { + std::string str(""); + str += "Current Configuration:\n"; + for (const auto &[k, value] : _configuration) + { + str += ""; + str += std::to_string(value.value); + // str += ", " + (value.is_mutable ? std::string("r/w") : std::string("r") ); + + str += "\t<- " + k + (!value.is_mutable ? " (immutable)" : "") + "\n"; + } + return str; + } + + private: + using ConfigurationMap = std::unordered_map; + + // holds the actual configuration values + ConfigurationMap _configuration; +}; +} // namespace beedb \ No newline at end of file diff --git a/src/include/database.h b/src/include/database.h new file mode 100644 index 0000000..552bde3 --- /dev/null +++ b/src/include/database.h @@ -0,0 +1,181 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include
+#include
+#include
+#include +#include + +namespace beedb +{ +class Database +{ + public: + Database(Config &config, const std::string &file_name); + ~Database(); + + /** + * Boots the database management system. + * During the boot, all persisted tables, their schemas and indices + * will be loaded to memory. + * All indices will be filled with data from disk. + */ + void boot(); + + /** + * @return Instance of the TableDiskManager. + */ + inline table::TableDiskManager &table_disk_manager() + { + return _table_disk_manager; + } + + /** + * @return Instance of the BufferManager. + */ + inline disk::BufferManager &buffer_manager() + { + return _buffer_manager; + } + + /** + * @return Immutable instance of the config. + */ + inline const Config &config() const + { + return _config; + } + + /** + * @return Mutable instance of the config. + */ + inline Config &config() + { + return _config; + } + + inline statistic::SystemStatistics &system_statistics() + { + return _statistics; + } + + /** + * Checks whether a table exists. + * + * @param name Name of the table. + * @return True, if the table exists. + */ + inline bool table_exists(const std::string &name) + { + return _tables.find(name) != _tables.end(); + } + + /** + * Returns a pointer to the requested table. + * + * @param name Name of the requested table. + * @return Pointer to the table. + */ + table::Table *table(const std::string &name) + { + if (table_exists(name)) + { + return _tables[name]; + } + + return nullptr; + } + + table::Table *operator[](const std::string &table_name) + { + return table(table_name); + } + + const table::Table &operator[](const std::string &table_name) const + { + // unchecked, const access to tables + return *(_tables.at(table_name)); + } + + /** + * Creates a table with a given schema. + * The table will be persisted and available after creation. + * + * @param schema Schema for the table. + */ + void create_table(const table::Schema &schema); + + /** + * Creates an index for a specific column. + * The index will be persisted, filled, and available after creation. + * + * @param column Column to be indexed. + * @param type Type of the index. + * @param name Name of the index. + * @param is_unique True, when the index is a unique index. + */ + void create_index(const table::Column &column, const index::Type type, const std::string &name, + const bool is_unique); + + private: + Config &_config; + disk::StorageManager _storage_manager; + disk::BufferManager _buffer_manager; + table::TableDiskManager _table_disk_manager; + + std::unordered_map _tables; + + std::uint32_t _next_table_id = 1; + std::uint32_t _next_column_id = 1; + std::uint32_t _next_index_id = 1; + + statistic::SystemStatistics _statistics; + + /** + * Initializes the database. When the database is empty, + * we will create a new database schema containing all meta tables. + * + * @param create_schema True, when a database schema should be created. + */ + void initialize_database(const bool create_schema); + + /** + * Persists the table statistics. + * + * @param table Table + * @param cardinality Cardinality + */ + void persist_table_statistics(table::Table *table, const std::uint64_t cardinality); +}; +} // namespace beedb \ No newline at end of file diff --git a/src/include/disk/buffer_manager.h b/src/include/disk/buffer_manager.h new file mode 100644 index 0000000..5e9ccec --- /dev/null +++ b/src/include/disk/buffer_manager.h @@ -0,0 +1,144 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "buffer_replacement_strategy.h" +#include "frame.h" +#include "page.h" +#include "storage_manager.h" +#include +#include +#include + +namespace beedb::disk +{ +/** + * The BufferManager buffers pages stored on the disk in memory. + * Since the system has not infinite memory, the number of buffered + * pages (named "frames") is limited. + * + * Every access to a page on the disk is done by pinning the page + * through the BufferManager. When the page is not needed any more + * (e.g. all tuples are scanned), the page can be unpinned by the + * BufferManager. + */ +class BufferManager +{ + public: + BufferManager(const std::size_t count_frames, StorageManager &space_manager, + std::unique_ptr replacement_strategy = nullptr); + ~BufferManager(); + + /** + * Loads the page from disk into memory and returns a pointer + * to the page. When the page is still buffered, the page will + * not be loaded twice, but guaranteed to stay in memory until + * it is unpinned. + * + * @param page_id Id of the page. + * @return Pointer to the page, that allows accessing the data. + */ + Page *pin(const Page::page_id page_id); + + /** + * Notifies the BufferManager that the page is not needed anymore. + * In case no one needs the page, the frame can be used for other + * pages buffered from disk in memory. + * + * @param page_id Id of the page. + * @param is_dirty True, when the content of the page was modified. + */ + void unpin(const Page::page_id page_id, const bool is_dirty); + + /** + * Notifies the BufferManager that the page is not needed anymore. + * In case no one needs the page, the frame can be used for other + * pages buffered from disk in memory. + * + * @param page_id Id of the page. + * @param is_dirty True, when the content of the page was modified. + */ + inline void unpin(Page *page, const bool is_dirty) + { + unpin(page->id(), is_dirty); + } + + /** + * Allocates a new page on the disk and loads the page to memory. + * + * @return Pointer to the pinned(!) page. + */ + Page *allocate(); + + /** + * Set the replacement strategy which picks frames to be replaced, + * when all frames are occupied, but a new page is requested to + * be loaded from disk to memory. + * @param replacement_strategy + */ + inline void replacement_strategy(std::unique_ptr replacement_strategy) + { + _replacement_strategy = std::move(replacement_strategy); + } + + /** + * @return Number of evicted frames. + */ + inline std::size_t evicted_frames() const + { + return _evicted_frames; + } + + private: + StorageManager &_space_manager; + std::unique_ptr _replacement_strategy; + + std::vector _frames; + std::vector _frame_information; + std::size_t _pin_sequence = 0u; + std::size_t _evicted_frames = 0u; + + /** + * Writes all dirty pages from memory to disk. + */ + void flush(); + + /** + * Lookup for frame information for a specific page. + * The frame information stores information like pin + * history, pinned page for a frame. + * + * @param page_id Id of the page. + * @return An iterator to the frame information or end() if the frame was not found. + */ + std::vector::iterator frame_information(const Page::page_id page_id); + + /** + * Find a frame which should be occupied for a new page request. + * The target frame can be a) a free frame or b) a frame holding + * another, not pinned page. + * + * @return Index for the frame + */ + std::size_t find_frame(); +}; +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/disk/buffer_replacement_strategy.h b/src/include/disk/buffer_replacement_strategy.h new file mode 100644 index 0000000..ed4d5f7 --- /dev/null +++ b/src/include/disk/buffer_replacement_strategy.h @@ -0,0 +1,47 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "frame.h" +#include + +namespace beedb::disk +{ +/** + * The BufferReplacementStrategy decides which frame should + * be re-used for a new page, when no free frame is available. + */ +class BufferReplacementStrategy +{ + public: + virtual ~BufferReplacementStrategy() = default; + + /** + * Picks a frame that holds a unused page and should be + * replaced by a new page, requested by a query. + * + * @param frame_information Information of all frames. + * @return Index of the frame, that should be used for a new page. + */ + virtual std::size_t find_victim(const std::vector &frame_information) = 0; +}; +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/disk/frame.h b/src/include/disk/frame.h new file mode 100644 index 0000000..9fee624 --- /dev/null +++ b/src/include/disk/frame.h @@ -0,0 +1,164 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "page.h" +#include +#include +#include + +namespace beedb::disk +{ +/** + * Storage for a page that is loaded from disk to memory. + */ +using Frame = std::array; + +/** + * Stores information about a frame that holds a page + * in memory. + * Information are: + * - The id of the current page hold by the frame + * - Number of pins + * - Dirty bit; if set, the page has to be written back + * to disk when the page is replaced. + * - History of pin timestamps + */ +class FrameInformation +{ + public: + FrameInformation() = default; + ~FrameInformation() = default; + + void occupy(const Page::page_id page_id, const std::size_t timestamp) + { + this->_page_id = page_id; + this->_pin_count = 1u; + this->_is_dirty = false; + this->_pin_timestamps.clear(); + this->_pin_timestamps.push_back(timestamp); + } + + /** + * @return Id of the occupied page. + */ + Page::page_id page_id() const + { + return _page_id; + } + + /** + * @return True, if the frame is occupied by a page. + */ + bool is_occupied() const + { + return _page_id != Page::INVALID_PAGE_ID; + } + + /** + * @return Number of active pins. + */ + std::size_t pin_count() const + { + return _pin_count; + } + + /** + * @return True, if the frame is pinned at the moment. + */ + bool is_pinned() const + { + return _pin_count > 0u; + } + + /** + * Increases the pin count and adds the timestamp to history. + * @param timestamp Timestamp of the pin. + */ + void increase_pin_count(const std::size_t timestamp) + { + _pin_count++; + _pin_timestamps.push_back(timestamp); + } + + /** + * Decreases the pin count. + */ + void decrease_pin_count() + { + _pin_count--; + } + + /** + * @return True, if the frame is dirty ergo the content + * of the page was modified. + */ + bool is_dirty() const + { + return _is_dirty; + } + + /** + * Update the dirty flag. + * @param is_dirty + */ + void is_dirty(const bool is_dirty) + { + _is_dirty = is_dirty; + } + + /** + * @return Timestamp of the last pin. + */ + std::size_t last_pin_timestamp() const + { + if (_pin_timestamps.empty()) + { + return std::numeric_limits::max(); + } + + return _pin_timestamps.back(); + } + + /** + * @return Timestamp of the i-th pin. + */ + std::size_t pin_timestamp(const std::size_t i) const + { + return _pin_timestamps[i]; + } + + /** + * @return Number of how many times the frame was pinned. + */ + std::size_t count_all_pins() const + { + return _pin_timestamps.size(); + } + + private: + Page::page_id _page_id = Page::INVALID_PAGE_ID; + std::size_t _pin_count = 0u; + bool _is_dirty = false; + std::vector _pin_timestamps; +}; +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/disk/lfu_strategy.h b/src/include/disk/lfu_strategy.h new file mode 100644 index 0000000..101cbd0 --- /dev/null +++ b/src/include/disk/lfu_strategy.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "buffer_replacement_strategy.h" + +namespace beedb::disk +{ +/** + * Replaces the last frequently used frame. + */ +class LFUStrategy final : public BufferReplacementStrategy +{ + public: + LFUStrategy() = default; + virtual ~LFUStrategy() = default; + virtual std::size_t find_victim(const std::vector &frame_information); +}; +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/disk/lru_k_strategy.h b/src/include/disk/lru_k_strategy.h new file mode 100644 index 0000000..6e4331d --- /dev/null +++ b/src/include/disk/lru_k_strategy.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "buffer_replacement_strategy.h" +#include + +namespace beedb::disk +{ +class LRUKStrategy final : public BufferReplacementStrategy +{ + public: + LRUKStrategy(const std::size_t k) : _k(k) + { + } + virtual ~LRUKStrategy() = default; + virtual std::size_t find_victim(const std::vector &frame_information); + + private: + const std::size_t _k = 0; + + std::size_t k() const + { + return _k; + } +}; +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/disk/lru_strategy.h b/src/include/disk/lru_strategy.h new file mode 100644 index 0000000..3e71834 --- /dev/null +++ b/src/include/disk/lru_strategy.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "buffer_replacement_strategy.h" + +namespace beedb::disk +{ +/** + * Replaces the last recently used frame. + */ +class LRUStrategy final : public BufferReplacementStrategy +{ + public: + LRUStrategy() = default; + virtual ~LRUStrategy() = default; + virtual std::size_t find_victim(const std::vector &frame_information); +}; +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/disk/page.h b/src/include/disk/page.h new file mode 100644 index 0000000..71e5e2b --- /dev/null +++ b/src/include/disk/page.h @@ -0,0 +1,172 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include +#include + +namespace beedb::disk +{ +/** + * Represents a page on disk, holding a header for meta information + * and raw memory for storing data. + * Pages can be linked logically. All linked pages contain data for the + * same table; like a linked list of storage. + */ +class Page +{ + public: + using page_id = std::uint64_t; + using page_offset = std::uint16_t; + static constexpr page_id INVALID_PAGE_ID = std::numeric_limits::max(); + static constexpr page_offset INVALID_PAGE_OFFSET = std::numeric_limits::max(); + + private: + /** + * Stores meta information for a page. + */ + struct Header + { + Page::page_id page_id = INVALID_PAGE_ID; + Page::page_id next_page_id = INVALID_PAGE_ID; + Page::page_offset last_written_index = 0u; + std::byte padding[8]; + } __attribute__((packed)); + + public: + static constexpr std::size_t data_size = Config::page_size - sizeof(Header); + + Page(const page_id id) : _data({std::byte{'\0'}}) + { + _header.page_id = id; + } + + ~Page() = default; + + /** + * @return Id of this page. + */ + inline page_id id() const + { + return _header.page_id; + } + + /** + * @return Id of the page which is logical connected to this page. + */ + inline page_id next_page_id() const + { + return _header.next_page_id; + } + + /** + * Updates the next page id. + * @param next_page_id Id of the next page. + */ + inline void next_page_id(const page_id next_page_id) + { + _header.next_page_id = next_page_id; + } + + /** + * @return True, when this page has a next page. + */ + inline bool has_next_page() const + { + return next_page_id() != INVALID_PAGE_ID; + } + + /** + * @return Number of bytes actually written to this page. + */ + inline Page::page_offset size() const + { + return _header.last_written_index; + } + + /** + * @return Number of free bytes. + */ + inline Page::page_offset free_space() const + { + return _data.size() - size(); + } + + /** + * @return Pointer to the data stored on this page. + */ + inline std::byte *data() + { + return _data.data(); + } + + /** + * Replaces content on the page. + * + * @param begin First byte to write. + * @param data Pointer to the data to be written. + * @param length Length of the data. + */ + inline void overwrite(const Page::page_offset begin, const std::byte *data, const Page::page_offset length) + { + std::memcpy(&_data[begin], data, length); + } + + /** + * Adds new content at the current end of the page. + * @param data Pointer to the data. + * @param length Length of the data. + * @return First byte the data was written to. + */ + inline std::size_t append(const std::byte *data, const Page::page_offset length) + { + const auto begin = _header.last_written_index; + _header.last_written_index = begin + length; + overwrite(begin, data, length); + return begin; + } + + /** + * @return True, when this is a persisted page. + */ + operator bool() const + { + return _header.page_id < INVALID_PAGE_ID; + } + + /** + * @param index Index of data. + * @return Pointer to the data. + */ + std::byte *operator[](const Page::page_offset index) + { + return &_data[index]; + } + + private: + Header _header; + std::array _data; +} __attribute__((packed)); +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/disk/storage_manager.h b/src/include/disk/storage_manager.h new file mode 100644 index 0000000..c385e08 --- /dev/null +++ b/src/include/disk/storage_manager.h @@ -0,0 +1,75 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "page.h" +#include +#include +#include + +namespace beedb::disk +{ +/** + * The StorageManager grants access to the data written to the disk. + */ +class StorageManager +{ + public: + StorageManager(const std::string &file_name); + ~StorageManager(); + + /** + * Copy a page from disk to memory. + * + * @param page_id Id of the page. + * @return Data, now copied to memory. + */ + Page read(const Page::page_id page_id); + + /** + * Write the page from memory to disk. + * + * @param page Page to be written. + */ + void write(Page &page); + + /** + * Allocates a new page in the disk file and extends the + * file by the new allocated page. + * + * @return Id of the new allocated page. + */ + Page::page_id allocate(); + + /** + * @return Number of pages stored in the disk file. + */ + inline std::size_t count_pages() const + { + return _count_pages; + } + + private: + std::size_t _count_pages = 0u; + std::fstream _storage_file; +}; +} // namespace beedb::disk \ No newline at end of file diff --git a/src/include/exception/command_exception.h b/src/include/exception/command_exception.h new file mode 100644 index 0000000..a113d28 --- /dev/null +++ b/src/include/exception/command_exception.h @@ -0,0 +1,86 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include + +namespace beedb::exception +{ +class CommandException : public std::exception +{ + public: + CommandException(const std::string &message) : _message(message) + { + } + + ~CommandException() = default; + + virtual const char *what() const noexcept + { + return _message.c_str(); + } + + private: + const std::string _message; +}; + +class UnknownCommandException final : public CommandException +{ + public: + UnknownCommandException(const std::string &command) : CommandException("Unknown command '" + command + "'.") + { + } + + UnknownCommandException() : UnknownCommandException("Unknown command") + { + } + + virtual ~UnknownCommandException() = default; +}; + +class CommandSyntaxException final : public CommandException +{ + public: + CommandSyntaxException(const std::string &command, const std::string &syntax_hint) + : CommandException("Command syntax exception '" + command + "'.\n" + syntax_hint) + { + } + + CommandSyntaxException() : CommandException("Command syntax exception") + { + } + + virtual ~CommandSyntaxException() = default; +}; + +class UnknownCommandInputException final : public CommandException +{ + public: + UnknownCommandInputException(const std::string &input) : CommandException("Not supported input '" + input + "'") + { + } + + virtual ~UnknownCommandInputException() = default; +}; +} // namespace beedb::exception \ No newline at end of file diff --git a/src/include/exception/config_exception.h b/src/include/exception/config_exception.h new file mode 100644 index 0000000..19da1f1 --- /dev/null +++ b/src/include/exception/config_exception.h @@ -0,0 +1,59 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "exception.h" +#include + +namespace beedb::exception +{ +class ConfigException : public DatabaseException +{ + public: + ConfigException(const std::string &message) : DatabaseException(DatabaseException::Configuration, message) + { + } + virtual ~ConfigException() = default; +}; + +class KeyNotFoundException final : public ConfigException +{ + public: + KeyNotFoundException(const std::string &key) : ConfigException("Option " + key + " not found.") + { + } + + virtual ~KeyNotFoundException() = default; +}; + +class CanNotModifyAtRuntimeException final : public ConfigException +{ + public: + CanNotModifyAtRuntimeException(const std::string &key) + : ConfigException("Option " + key + " can not be changed at runtime.") + { + } + + virtual ~CanNotModifyAtRuntimeException() = default; +}; + +} // namespace beedb::exception \ No newline at end of file diff --git a/src/include/exception/disk_exception.h b/src/include/exception/disk_exception.h new file mode 100644 index 0000000..b519682 --- /dev/null +++ b/src/include/exception/disk_exception.h @@ -0,0 +1,79 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "exception.h" +#include + +namespace beedb::exception +{ +class DiskException : public DatabaseException +{ + public: + DiskException(const std::string &message) : DatabaseException(DatabaseException::Disk, message) + { + } + virtual ~DiskException() = default; +}; + +class EvictedPagePinnedException final : public DiskException +{ + public: + EvictedPagePinnedException(const std::uint64_t frame_index) + : DiskException("Can not evict page, frame " + std::to_string(frame_index) + " is pinned.") + { + } + + virtual ~EvictedPagePinnedException() = default; +}; + +class PageWasNotPinnedException final : public DiskException +{ + public: + PageWasNotPinnedException(const std::uint64_t disk_id) + : DiskException("Page " + std::to_string(disk_id) + " is not pinned, but unpin() called.") + { + } + + virtual ~PageWasNotPinnedException() = default; +}; + +class NoFreeFrameException final : public DiskException +{ + public: + NoFreeFrameException() : DiskException("No free frame found for eviction.") + { + } + + virtual ~NoFreeFrameException() = default; +}; + +class CanNotOpenStorageFile final : public DiskException +{ + public: + CanNotOpenStorageFile() : DiskException("Can not open storage file.") + { + } + + virtual ~CanNotOpenStorageFile() = default; +}; +} // namespace beedb::exception \ No newline at end of file diff --git a/src/include/exception/exception.h b/src/include/exception/exception.h new file mode 100644 index 0000000..25b975a --- /dev/null +++ b/src/include/exception/exception.h @@ -0,0 +1,63 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include + +namespace beedb::exception +{ +/** + * Generic exception for plan building and execution. + */ +class DatabaseException : public std::exception +{ + public: + enum Layer + { + Parser, + LogicalPlan, + Execution, + Index, + Disk, + OutputHandler, + Configuration + }; + + virtual ~DatabaseException() = default; + + virtual const char *what() const noexcept + { + return _message.c_str(); + } + + protected: + DatabaseException(const Layer layer, const std::string &message) : _layer(layer), _message(message) + { + } + + private: + const Layer _layer; + const std::string _message; +}; +} // namespace beedb::exception \ No newline at end of file diff --git a/src/include/exception/execution_exception.h b/src/include/exception/execution_exception.h new file mode 100644 index 0000000..ab87fa6 --- /dev/null +++ b/src/include/exception/execution_exception.h @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "exception.h" +#include + +namespace beedb::exception +{ +class ExecutionException : public DatabaseException +{ + public: + ExecutionException(const std::string &message) : DatabaseException(DatabaseException::Execution, message) + { + } + virtual ~ExecutionException() = default; +}; + +class NoPhysicalOperatorForNode final : public ExecutionException +{ + public: + NoPhysicalOperatorForNode(const std::string &operator_name) + : ExecutionException("Operator " + operator_name + " is not implemented physically.") + { + } + + virtual ~NoPhysicalOperatorForNode() = default; +}; +} // namespace beedb::exception \ No newline at end of file diff --git a/src/include/exception/logical_exception.h b/src/include/exception/logical_exception.h new file mode 100644 index 0000000..f7e9d86 --- /dev/null +++ b/src/include/exception/logical_exception.h @@ -0,0 +1,213 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "exception.h" +#include + +namespace beedb::exception +{ +class LogicalException : public DatabaseException +{ + public: + LogicalException(const std::string &message) : DatabaseException(DatabaseException::LogicalPlan, message) + { + } + virtual ~LogicalException() = default; +}; + +class ColumnCanNotBeNull final : public LogicalException +{ + public: + ColumnCanNotBeNull(const std::string &table_name, const std::string &column_name) + : LogicalException("Value for column " + table_name + "." + column_name + " can not be NULL.") + { + } + + virtual ~ColumnCanNotBeNull() = default; +}; + +class ColumnNotFoundException final : public LogicalException +{ + public: + ColumnNotFoundException(const std::string &table_name, const std::string &column_name) + : ColumnNotFoundException(table_name + "." + column_name) + { + } + + ColumnNotFoundException(const std::string &table_and_column_name) + : LogicalException("Column " + table_and_column_name + " not found.") + { + } + + virtual ~ColumnNotFoundException() = default; +}; + +class ColumnNotGroupedException final : public LogicalException +{ + public: + ColumnNotGroupedException(const std::string &table_name, const std::string &column_name) + : LogicalException("Column " + table_name + "." + column_name + " is neither aggregated nor grouped.") + { + } + + virtual ~ColumnNotGroupedException() = default; +}; + +class ColumnNotIndexed final : public LogicalException +{ + public: + ColumnNotIndexed(const std::string &table_name, const std::string &column_name) + : LogicalException("Index for " + table_name + "." + column_name + " not found.") + { + } + + virtual ~ColumnNotIndexed() = default; +}; + +class IndexAlreadyExistsException final : public LogicalException +{ + public: + IndexAlreadyExistsException(const std::string &table_name, const std::string &column_name) + : LogicalException("Index for " + table_name + "." + column_name + " already exists.") + { + } + + virtual ~IndexAlreadyExistsException() = default; +}; + +class IndexUnsupportedTypeException final : public LogicalException +{ + public: + IndexUnsupportedTypeException(const std::string &type_name) + : LogicalException("Type " + type_name + " can not be indexed, Type is unsupported.") + { + } + + virtual ~IndexUnsupportedTypeException() = default; +}; + +class MultipleGroupByException final : public LogicalException +{ + public: + MultipleGroupByException() : LogicalException("Only one GROUP BY argument supported!") + { + } + + virtual ~MultipleGroupByException() = default; +}; + +class MultipleTableReferences final : public LogicalException +{ + public: + MultipleTableReferences(const std::string &table_name) + : LogicalException("Multiple references to table " + table_name + " without disambiguation.") + { + } + + virtual ~MultipleTableReferences() = default; +}; + +class TableAlreadyExists final : public LogicalException +{ + public: + TableAlreadyExists(const std::string &table_name) : LogicalException("Table " + table_name + " already exists.") + { + } + + virtual ~TableAlreadyExists() = default; +}; + +class TableNotFoundException final : public LogicalException +{ + public: + TableNotFoundException(const std::string &table_name) : LogicalException("Table " + table_name + " not found.") + { + } + + TableNotFoundException(const std::string &table_name, const std::string &reference_name) + : LogicalException("Can not resolve table reference " + table_name + " in statement " + reference_name + ".") + { + } + + virtual ~TableNotFoundException() = default; +}; + +class CanNotResolveColumnException final : public LogicalException +{ + public: + CanNotResolveColumnException(const std::string &column_name, const std::string &statement) + : LogicalException("Can not resolve attribute reference " + column_name + " to a table in statement " + + statement + ".") + { + } + + CanNotResolveColumnException(const std::string &column_name) + : LogicalException("Can not resolve attribute reference " + column_name + ".") + { + } + + virtual ~CanNotResolveColumnException() = default; +}; + +class NoUniqueReferenceException final : public LogicalException +{ + public: + NoUniqueReferenceException(const std::string &attribute_name, const std::string &table1, const std::string &table2) + : LogicalException("Can not uniquely reference attribute " + attribute_name + ". Both table " + table1 + + " and " + table2 + " include this attribute!") + { + } + + virtual ~NoUniqueReferenceException() = default; +}; + +class CanNotCreateTableException final : public LogicalException +{ + public: + CanNotCreateTableException() : LogicalException("Can not create table.") + { + } + + virtual ~CanNotCreateTableException() = default; +}; + +class CanNotCreateIndexException final : public LogicalException +{ + public: + CanNotCreateIndexException() : LogicalException("Can not create index.") + { + } + + virtual ~CanNotCreateIndexException() = default; +}; + +class CanNotInsertException final : public LogicalException +{ + public: + CanNotInsertException() : LogicalException("Can not insert.") + { + } + + virtual ~CanNotInsertException() = default; +}; +} // namespace beedb::exception \ No newline at end of file diff --git a/src/include/exception/parser_exception.h b/src/include/exception/parser_exception.h new file mode 100644 index 0000000..538a43c --- /dev/null +++ b/src/include/exception/parser_exception.h @@ -0,0 +1,89 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "exception.h" +#include + +namespace beedb::exception +{ +class ParserException : public DatabaseException +{ + public: + ParserException(const std::string &message) : DatabaseException(DatabaseException::Parser, message) + { + } + virtual ~ParserException() = default; +}; + +class SqlException final : public ParserException +{ + public: + SqlException(const std::string &parser_error, const std::size_t line, const std::size_t column) + : ParserException(parser_error + " in line " + std::to_string(line) + ":" + std::to_string(column) + ".") + { + } + + virtual ~SqlException() = default; +}; + +class UnsupportedStatementException final : public ParserException +{ + public: + UnsupportedStatementException(const std::string &statement) : ParserException(statement + " is not supported.") + { + } + + virtual ~UnsupportedStatementException() = default; +}; + +class CanNotConvertNullptrException final : public ParserException +{ + public: + CanNotConvertNullptrException() : ParserException("Can not convert nulltptr expression.") + { + } + + virtual ~CanNotConvertNullptrException() = default; +}; + +class UnsupportedOperatorException final : public ParserException +{ + public: + UnsupportedOperatorException(const std::string &operator_name) + : ParserException("Unsupported predicate operator: " + operator_name + ".") + { + } + + virtual ~UnsupportedOperatorException() = default; +}; + +class UnsupportedColumnType final : public ParserException +{ + public: + UnsupportedColumnType() : ParserException("Unsupported column type.") + { + } + + virtual ~UnsupportedColumnType() = default; +}; +} // namespace beedb::exception \ No newline at end of file diff --git a/src/include/execution/add_to_index_operator.h b/src/include/execution/add_to_index_operator.h new file mode 100644 index 0000000..9eda608 --- /dev/null +++ b/src/include/execution/add_to_index_operator.h @@ -0,0 +1,54 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "unary_operator.h" +#include +#include + +namespace beedb::execution +{ +/** + * Operator for adding tuples to an existing index. + */ +class AddToIndexOperator final : public UnaryOperator +{ + public: + AddToIndexOperator(const std::uint32_t column_index, const std::shared_ptr index); + virtual ~AddToIndexOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + }; + + private: + table::Schema _schema; + const std::uint32_t _column_index; + const std::shared_ptr _index; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/aggregate_operator.h b/src/include/execution/aggregate_operator.h new file mode 100644 index 0000000..5efaad5 --- /dev/null +++ b/src/include/execution/aggregate_operator.h @@ -0,0 +1,67 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +//#pragma once +// +//#include "abstract_operator.h" +//#include "operator_at_a_time_interface.h" +//#include +//#include +// +//#include "expression/attribute.h" +// +// namespace beedb::execution { +// class Aggregator +// { +// public: +// void sum(const std::vector& source_tuples, const std::size_t source_index, table::Tuple& +// target_tuple, const std::size_t target_index); void avg(const std::vector& source_tuples, const +// std::size_t source_index, table::Tuple& target_tuple, const std::size_t target_index); void min(const +// std::vector& source_tuples, const std::size_t source_index, table::Tuple& target_tuple, const +// std::size_t target_index); void max(const std::vector& source_tuples, const std::size_t +// source_index, table::Tuple& target_tuple, const std::size_t target_index); void count(const +// std::vector& source_tuples, table::Tuple& target_tuple, const std::size_t target_index); +// }; +// +// class AggregateOperator : public AbstractOperator, public OperatorAtATimeInterface +// { +// public: +// AggregateOperator(const table::Schema& schema, +// const std::vector& groups, +// const std::vector& columns); +// virtual ~AggregateOperator() = default; +// +// virtual void initialize(); +// virtual std::vector execute(); +// private: +// bool _aggregated = false; +// const std::vector& _groups; +// const std::vector& _columns; +// std::unordered_map _schema_index_aggregation_map; +// std::unordered_map _schema_index_map; +// std::vector _aggregation_indices; +// std::vector _group_indices; +// +// table::Type aggregation_column_type(const expression::AttributeOrigin aggregation, const table::Type& +// column_type); std::size_t hash(const std::vector& group_indices, const table::Tuple& tuple); +// }; +//} \ No newline at end of file diff --git a/src/include/execution/binary_operator.h b/src/include/execution/binary_operator.h new file mode 100644 index 0000000..1cfb735 --- /dev/null +++ b/src/include/execution/binary_operator.h @@ -0,0 +1,66 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "operator_interface.h" +#include +#include
+#include
+ +namespace beedb::execution +{ +/** + * Abstract operator that has to children (left and right) + * for operators JOIN-like operators. + */ +class BinaryOperator : public OperatorInterface +{ + public: + BinaryOperator() = default; + virtual ~BinaryOperator() = default; + + void left_child(std::unique_ptr child) + { + _left_child = std::move(child); + } + void right_child(std::unique_ptr child) + { + _right_child = std::move(child); + } + + const std::unique_ptr &left_child() const + { + return _left_child; + } + const std::unique_ptr &right_child() const + { + return _right_child; + } + + protected: + table::Tuple combine(const table::Schema &new_schema, const table::Tuple &left, const table::Tuple &right) const; + + private: + std::unique_ptr _left_child; + std::unique_ptr _right_child; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/build_index_operator.h b/src/include/execution/build_index_operator.h new file mode 100644 index 0000000..73e601e --- /dev/null +++ b/src/include/execution/build_index_operator.h @@ -0,0 +1,72 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "binary_operator.h" +#include +#include +#include +#include +#include
+ +namespace beedb::execution +{ +/** + * Fills an existing index with values. + * May have two children: One operator creating the index (left) + * which is called once and one operator (right) which provides the + * data for the index. + */ +class BuildIndexOperator final : public BinaryOperator +{ + public: + BuildIndexOperator(Database &database, const std::string &table_name, + const table::Schema::ColumnIndexType column_index, const std::string &index_name); + virtual ~BuildIndexOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + void create_index_operator(std::unique_ptr op) + { + this->left_child(std::move(op)); + } + void data_operator(std::unique_ptr op) + { + this->right_child(std::move(op)); + } + + private: + const table::Schema _schema; + Database &_database; + const std::string _table_name; + const std::uint32_t _column_index; + const std::string _index_name; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/create_index_operator.h b/src/include/execution/create_index_operator.h new file mode 100644 index 0000000..4a40917 --- /dev/null +++ b/src/include/execution/create_index_operator.h @@ -0,0 +1,72 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "operator_interface.h" +#include +#include +#include +#include +#include +#include
+#include
+#include
+#include +#include + +namespace beedb::execution +{ +/** + * Operator that creates and persists a new index for a given + * column with given index attributes (type, unique or non-unique). + */ +class CreateIndexOperator final : public OperatorInterface +{ + public: + CreateIndexOperator(Database &database, const std::string &table_name, const expression::Attribute &attribute, + const std::string &index_name, const bool is_unique, const index::Type type); + virtual ~CreateIndexOperator() = default; + + virtual void open() + { + } + virtual util::optional next(); + virtual void close() + { + } + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema _schema; + Database &_database; + const std::string _table_name; + const expression::Attribute _attribute; + const std::string _index_name; + const bool _is_unique; + const index::Type _index_type; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/create_table_operator.h b/src/include/execution/create_table_operator.h new file mode 100644 index 0000000..baf2731 --- /dev/null +++ b/src/include/execution/create_table_operator.h @@ -0,0 +1,62 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "operator_interface.h" +#include +#include +#include +#include +#include
+#include
+#include
+#include +#include + +namespace beedb::execution +{ +/** + * Creates and persists a new table. + */ +class CreateTableOperator final : public OperatorInterface +{ + public: + CreateTableOperator(Database &database, const table::Schema schema_to_create); + + virtual ~CreateTableOperator() = default; + + virtual void open(){}; + virtual util::optional next(); + virtual void close(){}; + + virtual const table::Schema &schema() const + { + return _schema; + }; + + private: + Database &_database; + const table::Schema _schema; + const table::Schema _schema_to_create; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/cross_product_operator.h b/src/include/execution/cross_product_operator.h new file mode 100644 index 0000000..2648af4 --- /dev/null +++ b/src/include/execution/cross_product_operator.h @@ -0,0 +1,58 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "binary_operator.h" +#include +#include
+#include
+#include
+#include
+#include +#include + +namespace beedb::execution +{ +/** + * Generates a cross product of two sources. + */ +class CrossProductOperator final : public BinaryOperator +{ + public: + CrossProductOperator(const table::Schema &&schema); + virtual ~CrossProductOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema _schema; + util::optional _next_left_tuple; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/hash_join_operator.h b/src/include/execution/hash_join_operator.h new file mode 100644 index 0000000..5106603 --- /dev/null +++ b/src/include/execution/hash_join_operator.h @@ -0,0 +1,108 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "binary_operator.h" +#include "tuple_buffer.h" +#include +#include +#include
+#include
+#include
+#include +#include +#include + +namespace beedb::execution +{ +/** + * Hash table for hash join. + */ +class HashTable +{ + public: + HashTable(const std::uint32_t key_index) : _key_index(key_index) + { + } + + ~HashTable() = default; + + bool contains(const table::Value &key) + { + return this->_map.find(key) != _map.end(); + } + + void put(const table::Tuple &tuple) + { + table::Tuple in_memory_tuple(tuple); + this->_map[tuple.get(_key_index)].push_back(std::move(in_memory_tuple)); + } + + const std::vector &get(const table::Value &key) + { + return _map[key]; + } + + private: + const std::uint32_t _key_index; + std::unordered_map> _map; +}; + +/** + * Operator that joins two sources using a hash table over + * the left source. + */ +class HashJoinOperator final : public BinaryOperator +{ + public: + HashJoinOperator(const table::Schema schema, const std::uint32_t left_index, const std::uint32_t right_index); + ~HashJoinOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema _schema; + const std::uint32_t _left_index; + const std::uint32_t _right_index; + HashTable _hash_table; + bool _is_built = false; + TupleBuffer _tuple_buffer; + + /** + * Builds the hash table. + */ + void build_hash_table(); + + /** + * Probes the hash table. + */ + util::optional probe_hash_table(); +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/index_scan_operator.h b/src/include/execution/index_scan_operator.h new file mode 100644 index 0000000..60d41f9 --- /dev/null +++ b/src/include/execution/index_scan_operator.h @@ -0,0 +1,119 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "operator_interface.h" +#include "tuple_buffer.h" +#include +#include +#include +#include +#include +#include +#include
+#include
+#include
+#include + +namespace beedb::execution +{ +/** + * Key (range) that have to be looked up in the. + * May be a range or a single key. + */ +class KeyRange +{ + public: + KeyRange(const std::int64_t single_key) : _from(single_key), _to(std::numeric_limits::max()) + { + } + + KeyRange(const std::int64_t from, const std::int64_t to) : _from(from), _to(to) + { + } + + ~KeyRange() = default; + + bool is_single_key() const + { + return _to == std::numeric_limits::max(); + } + std::int64_t single_key() const + { + return _from; + } + std::int64_t from() const + { + return _from; + } + std::int64_t to() const + { + return _to; + } + + bool operator<(const KeyRange &other) const + { + return _from < other._from; + } + + private: + const std::int64_t _from; + const std::int64_t _to; +}; + +/** + * Takes an index and keys to be looked up in the index + * and scans only over pages found in the index instead + * of scanning all pages from the table. + */ +class IndexScanOperator final : public OperatorInterface +{ + public: + IndexScanOperator(const std::uint32_t scan_page_limit, const table::Schema &schema, + disk::BufferManager &buffer_manager, table::TableDiskManager &table_disk_manager, + std::set &key_ranges, std::shared_ptr index); + virtual ~IndexScanOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const std::uint32_t _scan_page_limit; + const table::Schema _schema; + disk::BufferManager &_buffer_manager; + table::TableDiskManager &_table_disk_manager; + std::set _key_ranges; + std::shared_ptr _index; + + std::queue _pages_to_scan; + std::vector _pinned_pages; + + TupleBuffer _buffer; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/insert_operator.h b/src/include/execution/insert_operator.h new file mode 100644 index 0000000..478c3b3 --- /dev/null +++ b/src/include/execution/insert_operator.h @@ -0,0 +1,63 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "unary_operator.h" +#include +#include +#include +#include
+#include
+#include
+ +namespace beedb::execution +{ +/** + * Inserts all tuples provided by the children operator. + * The child may be a tuple buffer or a subquery. + */ +class InsertOperator final : public UnaryOperator +{ + public: + InsertOperator(disk::BufferManager &buffer_manager, table::TableDiskManager &table_disk_manager, + statistic::SystemStatistics &statistics, table::Table &table); + virtual ~InsertOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema _schema; + disk::BufferManager &_buffer_manager; + table::TableDiskManager &_table_disk_manager; + statistic::SystemStatistics &_statistics; + table::Table &_table; + disk::Page::page_id _last_pinned_page = disk::Page::INVALID_PAGE_ID; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/limit_operator.h b/src/include/execution/limit_operator.h new file mode 100644 index 0000000..9657ce0 --- /dev/null +++ b/src/include/execution/limit_operator.h @@ -0,0 +1,59 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "unary_operator.h" +#include +#include
+#include
+#include + +namespace beedb::execution +{ +/** + * Limits the output result. + */ +class LimitOperator final : public UnaryOperator +{ + public: + LimitOperator(const table::Schema &schema, const std::uint64_t limit, const std::uint64_t offset); + + virtual ~LimitOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + }; + + private: + const table::Schema &_schema; + const std::uint64_t _limit; + const std::uint64_t _offset; + bool _has_skipped = false; + std::uint64_t _count = 0; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/nested_loops_join_operator.h b/src/include/execution/nested_loops_join_operator.h new file mode 100644 index 0000000..88b7191 --- /dev/null +++ b/src/include/execution/nested_loops_join_operator.h @@ -0,0 +1,66 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "binary_operator.h" +#include "predicate_matcher.h" +#include +#include
+#include
+#include
+#include
+#include +#include + +namespace beedb::execution +{ +/** + * Joins two sources by outer-looping over the left + * and inner looping over the right children tuples. + */ +class NestedLoopsJoinOperator final : public BinaryOperator +{ + public: + NestedLoopsJoinOperator(const table::Schema &&schema, std::unique_ptr predicate_matcher); + virtual ~NestedLoopsJoinOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema _schema; + std::unique_ptr _predicate_matcher; + util::optional _next_left_tuple; + + bool matches(const table::Tuple &left, const table::Tuple &right) + { + return this->_predicate_matcher->matches(left, right); + } +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/operator_interface.h b/src/include/execution/operator_interface.h new file mode 100644 index 0000000..d403fa2 --- /dev/null +++ b/src/include/execution/operator_interface.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include
+#include
+#include + +namespace beedb::execution +{ +/** + * Interface for all physical execution operators. + * The interface is volcano-style (using open, next, and close). + */ +class OperatorInterface +{ + public: + OperatorInterface() = default; + virtual ~OperatorInterface() = default; + + virtual void open() = 0; + virtual util::optional next() = 0; + virtual void close() = 0; + + virtual const table::Schema &schema() const = 0; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/order_operator.h b/src/include/execution/order_operator.h new file mode 100644 index 0000000..a48652e --- /dev/null +++ b/src/include/execution/order_operator.h @@ -0,0 +1,110 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "unary_operator.h" +#include +#include +#include
+#include
+#include +#include + +namespace beedb::execution +{ +/** + * Comparator for comparing two tuples during + * sort. + */ +class TupleComparator +{ + public: + TupleComparator(const std::vector> &indices) : _indices(indices) + { + } + ~TupleComparator() = default; + + bool operator()(const table::Tuple &left, const table::Tuple &right) const + { + for (auto &[index, is_ascending] : _indices) + { + const auto value_left = left.get(index); + const auto value_right = right.get(index); + if (is_ascending) + { + if (value_left < value_right) + { + return true; + } + else if (value_left > value_right) + { + return false; + } + } + else + { + if (value_right < value_left) + { + return true; + } + else if (value_right > value_left) + { + return false; + } + } + } + + return false; + } + + private: + const std::vector> &_indices; +}; + +/** + * Sorts the result provided by a child using quicksort. + */ +class OrderOperator final : public UnaryOperator +{ + public: + OrderOperator(const table::Schema &schema, std::vector> &&order_columns); + virtual ~OrderOperator() = default; + + virtual void open(); + + virtual util::optional next(); + + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema &_schema; + const std::vector> _order_columns; + std::unique_ptr _result_table; + std::size_t _stack_index = 0u; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/predicate_matcher.h b/src/include/execution/predicate_matcher.h new file mode 100644 index 0000000..a97abb8 --- /dev/null +++ b/src/include/execution/predicate_matcher.h @@ -0,0 +1,268 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include + +#include
+#include
+ +namespace beedb::execution +{ + +/** + * Interface for predicate matcher. The predicate matcher + * takes one or two tuples and compares them with a constant + * or a value from another tuple. + */ +class PredicateMatcherInterface +{ + public: + enum Comparison + { + EQ, + LE, + LT, + GE, + GT, + NEQ + }; + virtual ~PredicateMatcherInterface() = default; + virtual bool matches(const table::Tuple &tuple) = 0; + virtual bool matches(const table::Tuple &left, const table::Tuple &right) = 0; +}; + +/** + * The given tuple will always match. + */ +class AlwaysTrueMatcher final : public PredicateMatcherInterface +{ + public: + AlwaysTrueMatcher() = default; + + virtual ~AlwaysTrueMatcher() = default; + + virtual bool matches(const table::Tuple &) + { + return true; + } + + virtual bool matches(const table::Tuple &, const table::Tuple &) + { + return true; + } +}; + +/** + * Takes two predicate matchers p1 and p2 and connects them using AND. + * Tuple matches when p1 AND p2 matches. + */ +class AndMatcher final : public PredicateMatcherInterface +{ + public: + AndMatcher(std::unique_ptr left, std::unique_ptr right) + : _left(std::move(left)), _right(std::move(right)) + { + } + + virtual ~AndMatcher() = default; + + virtual bool matches(const table::Tuple &tuple) + { + return _left->matches(tuple) && _right->matches(tuple); + } + + virtual bool matches(const table::Tuple &left, const table::Tuple &right) + { + return _left->matches(left, right) && _right->matches(left, right); + } + + private: + std::unique_ptr _left; + std::unique_ptr _right; +}; + +/** + * Takes two predicate matchers p1 and p2 and connects them using OR. + * Tuple matches when p1 OR p2 matches. + */ +class OrMatcher final : public PredicateMatcherInterface +{ + public: + OrMatcher(std::unique_ptr left, std::unique_ptr right) + : _left(std::move(left)), _right(std::move(right)) + { + } + + virtual ~OrMatcher() = default; + + virtual bool matches(const table::Tuple &tuple) + { + return _left->matches(tuple) || _right->matches(tuple); + } + + virtual bool matches(const table::Tuple &left, const table::Tuple &right) + { + return _left->matches(left, right) || _right->matches(left, right); + } + + private: + std::unique_ptr _left; + std::unique_ptr _right; +}; + +/** + * Compares a specific column in a tuple with a constant. + */ +template class AttributeValueMatcher final : public PredicateMatcherInterface +{ + public: + AttributeValueMatcher(const table::Schema::ColumnIndexType schema_index, const table::Value value) + : _schema_index(schema_index), _value(value) + { + } + + ~AttributeValueMatcher() = default; + + virtual bool matches(const table::Tuple &tuple) + { + if constexpr (C == EQ) + { + return tuple.get(_schema_index) == _value; + } + else if constexpr (C == LE) + { + return tuple.get(_schema_index) <= _value; + } + else if constexpr (C == LT) + { + return tuple.get(_schema_index) < _value; + } + else if constexpr (C == GE) + { + return tuple.get(_schema_index) >= _value; + } + else if constexpr (C == GT) + { + return tuple.get(_schema_index) > _value; + } + else + { + return tuple.get(_schema_index) != _value; + } + } + + virtual bool matches(const table::Tuple &, const table::Tuple &) + { + return false; + } + + protected: + const table::Schema::ColumnIndexType _schema_index; + const table::Value _value; +}; + +/** + * Compares two columns in a tuple or one column of two tuples. + */ +template class AttributeMatcher final : public PredicateMatcherInterface +{ + public: + AttributeMatcher(const table::Schema::ColumnIndexType schema_index_left, + const table::Schema::ColumnIndexType schema_index_right) + : _schema_index_left(schema_index_left), _schema_index_right(schema_index_right) + { + } + + ~AttributeMatcher() = default; + + virtual bool matches(const table::Tuple &tuple) + { + if constexpr (C == EQ) + { + return tuple.get(_schema_index_left) == tuple.get(_schema_index_right); + } + else if constexpr (C == LE) + { + return tuple.get(_schema_index_left) <= tuple.get(_schema_index_right); + } + else if constexpr (C == LT) + { + return tuple.get(_schema_index_left) < tuple.get(_schema_index_right); + } + else if constexpr (C == GE) + { + return tuple.get(_schema_index_left) >= tuple.get(_schema_index_right); + } + else if constexpr (C == GT) + { + return tuple.get(_schema_index_left) > tuple.get(_schema_index_right); + } + else + { + return tuple.get(_schema_index_left) != tuple.get(_schema_index_right); + } + } + + virtual bool matches(const table::Tuple &left, const table::Tuple &right) + { + if constexpr (C == EQ) + { + return left.get(_schema_index_left) == right.get(_schema_index_right); + } + else if constexpr (C == LE) + { + return left.get(_schema_index_left) <= right.get(_schema_index_right); + } + else if constexpr (C == LT) + { + return left.get(_schema_index_left) < right.get(_schema_index_right); + } + else if constexpr (C == GE) + { + return left.get(_schema_index_left) >= right.get(_schema_index_right); + } + else if constexpr (C == GT) + { + return left.get(_schema_index_left) > right.get(_schema_index_right); + } + else + { + return left.get(_schema_index_left) != right.get(_schema_index_right); + } + } + + table::Schema::ColumnIndexType left_index() const + { + return _schema_index_left; + } + + table::Schema::ColumnIndexType right_index() const + { + return _schema_index_right; + } + + protected: + const table::Schema::ColumnIndexType _schema_index_left; + const table::Schema::ColumnIndexType _schema_index_right; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/projection_operator.h b/src/include/execution/projection_operator.h new file mode 100644 index 0000000..6b253f2 --- /dev/null +++ b/src/include/execution/projection_operator.h @@ -0,0 +1,56 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "unary_operator.h" +#include +#include +#include
+#include +#include + +namespace beedb::execution +{ +/** + * Applies a new schema to all tuples provided by the children. + * May hide or re-order columns. + */ +class ProjectionOperator final : public UnaryOperator +{ + public: + ProjectionOperator(const table::Schema schema); + virtual ~ProjectionOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema _schema; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/selection_operator.h b/src/include/execution/selection_operator.h new file mode 100644 index 0000000..77c8f43 --- /dev/null +++ b/src/include/execution/selection_operator.h @@ -0,0 +1,67 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "predicate_matcher.h" +#include "unary_operator.h" +#include +#include +#include +#include
+#include
+#include
+ +namespace beedb::execution +{ +/** + * Selects tuples matching a given predicate. + * Some tuples may be filtered. + */ +class SelectionOperator final : public UnaryOperator +{ + public: + SelectionOperator(const table::Schema &schema, std::unique_ptr predicate_matcher); + + virtual ~SelectionOperator() = default; + + virtual void open(); + + virtual util::optional next(); + + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const table::Schema &_schema; + std::unique_ptr _predicate_matcher; + + bool matches(const table::Tuple &tuple) + { + return this->_predicate_matcher->matches(tuple); + } +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/sequential_scan_operator.h b/src/include/execution/sequential_scan_operator.h new file mode 100644 index 0000000..750ff6d --- /dev/null +++ b/src/include/execution/sequential_scan_operator.h @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "tuple_buffer.h" +#include "unary_operator.h" +#include +#include +#include
+#include
+#include
+#include + +namespace beedb::execution +{ +/** + * Scans all pages of a given table and returns all tuples. + */ +class SequentialScanOperator final : public UnaryOperator +{ + public: + SequentialScanOperator(const std::uint32_t scan_page_limit, const table::Schema &schema, + disk::BufferManager &buffer_manager, table::TableDiskManager &table_disk_manager, + const table::Table &table); + virtual ~SequentialScanOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + const std::uint32_t _scan_page_limit; + const table::Schema _schema; + disk::BufferManager &_buffer_manager; + table::TableDiskManager &_table_disk_manager; + const table::Table &_table; + + disk::Page::page_id _next_page_id_to_scan; + std::vector _pinned_pages; + + TupleBuffer _buffer; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/tuple_buffer.h b/src/include/execution/tuple_buffer.h new file mode 100644 index 0000000..d8e6707 --- /dev/null +++ b/src/include/execution/tuple_buffer.h @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include
+#include + +namespace beedb::execution +{ +/** + * Buffers tuples; needed by blocking operators. + */ +class TupleBuffer +{ + public: + TupleBuffer() = default; + ~TupleBuffer() = default; + void add(table::Tuple &tuple) + { + _buffer.push_back(std::move(tuple)); + } + void add(std::vector &tuples) + { + std::move(tuples.begin(), tuples.end(), std::back_inserter(_buffer)); + } + + bool empty() const + { + return _buffer.empty() || _head > _buffer.size() - 1; + } + table::Tuple &&pop() + { + return std::move(_buffer[_head++]); + } + void clear() + { + _buffer.clear(); + _head = 0u; + } + + private: + std::vector _buffer; + std::size_t _head = 0u; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/tuple_buffer_operator.h b/src/include/execution/tuple_buffer_operator.h new file mode 100644 index 0000000..f379220 --- /dev/null +++ b/src/include/execution/tuple_buffer_operator.h @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "operator_interface.h" +#include "tuple_buffer.h" +#include
+#include
+#include + +namespace beedb::execution +{ +/** + * Buffers tuples and pops the on next(). + */ +class TupleBufferOperator final : public OperatorInterface +{ + public: + TupleBufferOperator(const table::Schema schema); + ~TupleBufferOperator() = default; + + virtual void open() + { + } + virtual util::optional next(); + virtual void close() + { + } + + virtual const table::Schema &schema() const + { + return _schema; + } + + void add(table::Tuple &tuple) + { + _tuple_buffer.add(tuple); + } + void add(std::vector &tuples) + { + _tuple_buffer.add(tuples); + } + + private: + const table::Schema _schema; + TupleBuffer _tuple_buffer; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/unary_operator.h b/src/include/execution/unary_operator.h new file mode 100644 index 0000000..927b9b5 --- /dev/null +++ b/src/include/execution/unary_operator.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "operator_interface.h" +#include + +namespace beedb::execution +{ +/** + * Interface for operators providing a single child. + */ +class UnaryOperator : public OperatorInterface +{ + public: + UnaryOperator() = default; + virtual ~UnaryOperator() = default; + + void child(std::unique_ptr child) + { + _child = std::move(child); + } + const std::unique_ptr &child() const + { + return _child; + } + + private: + std::unique_ptr _child; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/execution/update_operator.h b/src/include/execution/update_operator.h new file mode 100644 index 0000000..7b4b752 --- /dev/null +++ b/src/include/execution/update_operator.h @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "unary_operator.h" +#include +#include
+#include
+#include +#include + +namespace beedb::execution +{ +class UpdateOperator : public UnaryOperator +{ + public: + UpdateOperator(table::TableDiskManager &table_disk_manager, + std::vector> &values) + : _table_disk_manager(table_disk_manager), _new_column_values(std::move(values)) + { + } + virtual ~UpdateOperator() = default; + + virtual void open(); + virtual util::optional next(); + virtual void close(); + + virtual const table::Schema &schema() const + { + return _schema; + } + + private: + table::Schema _schema; + table::TableDiskManager _table_disk_manager; + std::vector> _new_column_values; +}; +} // namespace beedb::execution \ No newline at end of file diff --git a/src/include/expression/attribute.h b/src/include/expression/attribute.h new file mode 100644 index 0000000..6299a6c --- /dev/null +++ b/src/include/expression/attribute.h @@ -0,0 +1,212 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace beedb::expression +{ + +using AttributeName = std::string; +using TableName = std::optional; +using Alias = std::optional; +using InAscendingOrder = std::optional; + +enum class AttributeOrigin : int +{ + PHYSICAL = 0, // the "default"; a physical column + MIXED = 1, // used for Asterisk + AGG_SUM, + AGG_COUNT, + AGG_AVG, + AGG_MIN, + AGG_MAX, // aggregations + ARI_ADD, + ARI_MUL, + ARI_DIV, + ARI_SUB // arithmetic +}; + +static std::string to_string(AttributeOrigin o) +{ + switch (o) + { + case AttributeOrigin::AGG_AVG: + return "AVG"; + case AttributeOrigin::AGG_SUM: + return "SUM"; + case AttributeOrigin::AGG_COUNT: + return "COUNT"; + case AttributeOrigin::AGG_MIN: + return "MIN"; + case AttributeOrigin::AGG_MAX: + return "MAX"; + case AttributeOrigin::PHYSICAL: + return "PHYSICAL"; + default: + return "UNSUPPORTED_FUNCTION"; + } +} + +// the actual type, that holds attribute information: +// + +struct Attribute +{ + friend std::ostream &operator<<(std::ostream &stream, const Attribute &attribute); + /// MEMBERS: + const AttributeName name; + + const TableName table = std::nullopt; + + // Note: An alias has no meaning for optimization or consistency + const Alias alias = {}; + + // Where do values from this attribute come from, a table or an operator? + const AttributeOrigin origin = AttributeOrigin::PHYSICAL; + + // Is this attribute (supposed to be) ordered in ascending order? + // false => desc. order, std::nullopt => no specified order + const InAscendingOrder order = std::nullopt; + + // string representation of an attribute. alias is prefered, if available. + // combined_name is used for comparison (see operator implementations). + const std::string combined_name = {alias.value_or( + (static_cast(origin) > 1) ? to_string(origin) + "(" + (table ? table.value() + "." : "") + name + ")" + : (table ? table.value() + "." : "") + name)}; + + /// FUNCTIONS: + operator std::string() const + { + return combined_name; + } + + // convenience constructor: + static Attribute create(const Attribute &other, const TableName table_) + { + return Attribute{other.name, table_, other.alias, other.origin, other.order}; + } + + // operators to utilize stl-classes templated with Attribute + + bool operator<(const Attribute &rhs) const + { + // simply compare compound name + return combined_name < rhs.combined_name; + } + + bool operator==(const Attribute &rhs) const + { + // simply compare compound name. Note that an alias/tablename is + // useful for disambiguation (e.g. in self-joins) + + return (name == rhs.name) && (table == rhs.table) && (origin == rhs.origin); + } + + bool operator>(const Attribute &rhs) const + { + // simply compare compound name + return combined_name >= rhs.combined_name; + } + bool operator<=(const Attribute &rhs) const + { + // simply compare compound name + return combined_name < rhs.combined_name; + } + bool operator>=(const Attribute &rhs) const + { + // simply compare compound name + return combined_name >= rhs.combined_name; + } + bool operator!=(const Attribute &rhs) const + { + return combined_name != rhs.combined_name; + } + + bool isAsterisk() const + { + // gives true for "*" and "T.*" + return name == "*" && !alias && origin == AttributeOrigin::MIXED && !order; + } +}; +/** + * @brief The Asterisk struct is a special instance of Attribute, used for user convenience. + * It will be resolved to a set of Attributes at logical plan creation time. + * E.g. "SELECT * FROM ..." or "SELECT T.* FROM T, ..." + */ +struct Asterisk : Attribute +{ + Asterisk(const TableName &table_ = std::nullopt) + : Attribute{"*", table_, std::nullopt, AttributeOrigin::MIXED, std::nullopt} + { + } +}; + +/** + * Attributes is a set of fully quallified attributes. + * + * "Fully quallified" implies that each attribute also holds its source. + * This can either be a table-name or a result of a computation + * (i.e. aggregation). Table-origin is encoded via string, aggregation (or + * not via type/template). + * + * Newly produced columns hold a generated table name for intermediate + * tables and a generated name (such as "SUM(T.A)"). + * + * Attributes's set-property enforces unique attributes in the collection. + * Note that an attribute's alias is used for disambiguation. + */ +using Attributes = std::vector; + +static inline Attributes recreateAttributes(const Attributes &other_attr, const std::string &table_name) +{ + Attributes attributes; + + for (const auto &attr : other_attr) + { + attributes.push_back(Attribute::create(attr, table_name)); + } + + return attributes; +} + +static inline std::string to_string(const Attributes &attributes) +{ + std::string ret = ""; + for (const auto &attr : attributes) + { + ret += " "; + ret += attr; + } + return ret; +} +} // namespace beedb::expression \ No newline at end of file diff --git a/src/include/expression/operator.h b/src/include/expression/operator.h new file mode 100644 index 0000000..2570dd3 --- /dev/null +++ b/src/include/expression/operator.h @@ -0,0 +1,684 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "expression/attribute.h" +#include "expression/predicate.h" +#include +#include +#include +#include
+ +namespace beedb::expression +{ +using namespace std::string_literals; + +/** + * A RequiredSchema is a set of required attributes. Requirements of scan operators are checked differently. + */ +using RequiredSchema = Attributes; + +/** + * This is the schema produced by an operator. It is represented as an edge "weight"/data in the graph. + */ +using OutgoingSchema = Attributes; + +/** + * This is the schema/attributes that are produced by an operator, e.g. by aggregations. + */ +using AttributeAdditions = Attributes; + +/** + * @brief The Operator struct represents operators as nodes in the graph. + * + * Depending on the type, operators can provide additional data, aside from + * "required schema" and "working schema": + * + * Logically relevant operators: + * CROSSPRODUCT: / + * JOIN: predicate + * SELECTION: predicate + * PROJECTION: / + * AGGREGATION: / + * GROUPBY: / + * QUERY: logical plan + * ARITHMETIC: an arithmetic constant (optional) + * + * + */ +struct Operator +{ + Operator(const RequiredSchema requirements_, const AttributeAdditions additional_attributes_, + const bool forwards_schema_) + : requirements(requirements_), additional_attributes(additional_attributes_), forwards_schema(forwards_schema_) + { + } + + virtual ~Operator() = default; + + RequiredSchema requirements; // attributes an operator requires + AttributeAdditions additional_attributes; // the attributes an operator works on + + // This attribute indicates that an operator type forwards incoming + // attributes or not. This is subclass-dependet information. + // It is used when infering outgoing schemas of an operator. This + // information can not be derived by the operator itself, because it + // depends on it's position in the plan. + const bool forwards_schema; + + /** + * @brief inferPredicateRequirements is a utility function, that + * extracts attributes from a predicate tree. + * + * Uses extractAttributes() as a helper function. + * + * Used in JOIN/SELECTION subclasses. + * + * @param pred the predicate, that contains attributes + * @return the extracted attributes + */ + static RequiredSchema inferPredicateRequirements(const Predicate &pred) + { + return PredicateAnalyzer::attributes(pred); + } + + /** + * @brief inferAggregationRequirements is a utility function, that + * extracts required attributes. this is implemented by replacing + * the origin (field within an attribute) with a physical origin. + * + * This can be interpreted as " Attribute 'SUM(A)' requires attribute + * 'A' to be computable." + * + * Used in AGGREGATION/GROUPBY subclasses. + * + * @param agg_attributes the attributes that might contain aggregations + * @return attributes with origin set to physical + */ + static RequiredSchema inferStatementRequirements(const Attributes &agg_attributes) + { + Attributes requirements; + // simply clone attributes without aggregation-origin (and physical origin instead): + for (const auto &attr : agg_attributes) + { + requirements.push_back({attr.name, attr.table, attr.alias, AttributeOrigin::PHYSICAL}); + } + return requirements; + } + + /** + * @brief operator std::string gives a representation of the operator. + */ + operator std::string() const + { + return to_string(); + } + + /** + * @brief clone creates an new instance of an operator. + * @return an exact copy of this + */ + virtual std::unique_ptr clone() const = 0; + + protected: + virtual std::string to_string() const = 0; +}; + +/** + * @brief The TABLE struct produces (physically existing) attributes of a + * single, specific table. + */ +struct TableOperator : Operator +{ + TableOperator(const AttributeAdditions table_attributes, const std::string &table_name) + : Operator{{/*no requirements*/}, table_attributes, false}, _table_name(table_name), _table_alias{std::nullopt} + { + // consistency check: we may only produce attributes from a single + // table + std::set tablenames; + for (const auto &attribute : table_attributes) + { + + assert(attribute.origin == AttributeOrigin::PHYSICAL); + assert(attribute.table.has_value()); + + tablenames.insert(attribute.table.value()); + } + + assert(tablenames.size() == 1); // table name/alias has to be the same for all attributes of this operator + + if (table_attributes.back().table.value() != _table_name) + { + _table_alias = table_attributes.back().table.value(); + } + } + + virtual ~TableOperator() = default; + + std::string to_string() const override + { + return "TABLE( " + _table_name + " )"; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->additional_attributes, this->_table_name); + } + + const std::string &physical_table_name() const + { + return _table_name; + } + const std::optional table_alias() const + { + return _table_alias; + } + + private: + const std::string _table_name; + std::optional _table_alias; +}; + +struct IndexScanOperator : Operator +{ + IndexScanOperator(const AttributeAdditions table_attributes, const std::string &table_name_, Predicate &&predicate_, + expression::Attribute indexed_attribute_) + : Operator{inferPredicateRequirements(predicate_), table_attributes, false}, table_name(table_name_), + predicate(std::move(predicate_)), indexed_attribute(indexed_attribute_) + { + } + + virtual ~IndexScanOperator() = default; + + std::string to_string() const override + { + return "INDEX_SCAN( " + table_name + " )"; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->additional_attributes, this->table_name, + expression::clone_predicate(predicate), indexed_attribute); + } + + const std::string table_name; + Predicate predicate; + expression::Attribute indexed_attribute; +}; + +/** + * @brief The CROSSPRODUCT struct is just a union of incoming attributes + * and therefore solely defined by its placement in the graph + */ +struct CrossProductOperator : public Operator +{ + + CrossProductOperator() : Operator{{}, {}, true} + { + } + + virtual ~CrossProductOperator() = default; + + std::string to_string() const override + { + return "CROSSPRODUCT"s; + } + + std::unique_ptr clone() const override + { + return std::make_unique(); + } +}; + +/** + * @brief The JOIN struct is defined by it's predicate (in + * addition to its placement) + */ +struct JoinOperator : public Operator +{ + enum Type + { + NestedLoopsJoin, + HashJoin + }; + + JoinOperator(Predicate &&join_predicate, const Type type_) + : Operator{inferPredicateRequirements(join_predicate), {/* No additions*/}, true}, + predicate(std::move(join_predicate)), type(type_) + { + } + + virtual ~JoinOperator() = default; + + Predicate predicate; + const Type type; + + std::string to_string() const override + { + std::string pred_str; + std::visit(expression::LogicalConnective::TermStringifier{pred_str}, predicate); + std::string str; + str += (type == NestedLoopsJoin) ? "NESTEDLOOPSJOIN" : "HASHJOIN"; + str += "( "; + str += pred_str; + str += " )"; + return str; + } + std::unique_ptr clone() const override + { + return std::make_unique(expression::clone_predicate(this->predicate), type); + } +}; +/** + * @brief The SELECTION struct represents a simple filter operator. + * Requirements are derived from the specified predicate. + */ +struct SelectionOperator : public Operator +{ + // we can only select from attributes that we get => requirements: + SelectionOperator(Predicate &&selection_predicate) + : Operator{inferPredicateRequirements(selection_predicate), {/* No additions*/}, true}, + predicate(std::move(selection_predicate)) + { + } + + virtual ~SelectionOperator() = default; + + Predicate predicate; + + std::string to_string() const override + { + std::string pred_str; + std::visit(expression::LogicalConnective::TermStringifier{pred_str}, predicate); + return "SELECTION( " + pred_str + " )"; + } + + std::unique_ptr clone() const override + { + return std::make_unique(expression::clone_predicate(this->predicate)); + } +}; +/** + * @brief The PROJECTION struct reduces the incoming attributes to the + * specified set. + */ +struct ProjectionOperator : public Operator +{ + // we can only project what is coming in => requirements: + ProjectionOperator(const RequiredSchema projected_attributes) + : Operator{projected_attributes, {/* No additions */}, false} + { + } + + virtual ~ProjectionOperator() = default; + + std::string to_string() const override + { + std::string str("PROJECTION( "); + for (const auto &attr : this->additional_attributes) + { + str += static_cast(attr) + " "; + } + str += ")"; + return str; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->requirements); + } +}; + +/** + * @brief The AGGREGATION struct performs an aggregation operation and + * creates a new column. + * + * Also, it forwards all other incoming columns, + * including the one that this operation is based on. + * + * I.e. {"A"} x {"A*4"} -> {"A","A*4"} + */ +struct AggregationOperator : public Operator +{ + AggregationOperator(const AttributeAdditions resulting_attributes) + : Operator{inferStatementRequirements(resulting_attributes), resulting_attributes, false} + { + } + + virtual ~AggregationOperator() = default; + + std::string to_string() const override + { + return "AGGREGATION"s; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->additional_attributes); + } +}; + +/** + * @brief The GROUPBY struct is similar to aggregation + * + */ +struct GroupByOperator : public AggregationOperator +{ + // GROUPBY is an AGGREGATION + GroupByOperator(const AttributeAdditions resulting_attributes) : AggregationOperator{resulting_attributes} + { + } + + virtual ~GroupByOperator() = default; + + std::string to_string() const override + { + std::string str("GROUPBY( "); + for (const auto &attr : this->additional_attributes) + { + str += attr; + str += " "; + } + str += ")"; + return str; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->additional_attributes); + } +}; + +struct OrderByOperator : public Operator +{ + OrderByOperator(const AttributeAdditions sorted_attributes) + : Operator{sorted_attributes, {/* No additions */}, true} + { + for (const auto &attribute : sorted_attributes) + { + assert(attribute.order.has_value()); // we can only make use of attributes that have an order specified + } + } + + virtual ~OrderByOperator() = default; + + std::string to_string() const override + { + std::string str("ORDERBY( "); + for (const auto &attr : this->additional_attributes) + { + str += static_cast(attr) + "("; + str += (attr.order.value() ? "ASC"s : "DESC"s); + str += ") "; + } + str += ")"; + return str; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->requirements); + } +}; + +/** + * @brief The ARITHMETIC struct is a binary arithmetic operation between + * values from columns or constants. + * + * The type of operation itself is encoded in the specified attributes! + */ +struct ArithmeticOperator : public Operator +{ + using ArithmeticConstant = std::optional>; + // An arithmetic operation is defined by the + ArithmeticOperator(const AttributeAdditions resulting_attributes, const ArithmeticConstant constant_ = {}) + : Operator{inferStatementRequirements(resulting_attributes), resulting_attributes, true}, constant(constant_) + { + } + + virtual ~ArithmeticOperator() = default; + + ArithmeticConstant constant; + + std::string to_string() const override + { + return "ARITHMETIC"s; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->additional_attributes); + } +}; + +struct LimitOperator : public Operator +{ + LimitOperator(const std::uint64_t limit_, const std::uint64_t offset_) + : Operator({}, {}, true), limit(limit_), offset(offset_) + { + } + + virtual ~LimitOperator() = default; + + const std::uint64_t limit; + const std::uint64_t offset; + + std::string to_string() const override + { + return "LIMIT( " + std::to_string(offset) + "," + std::to_string(limit) + " )"; + } + + std::unique_ptr clone() const override + { + return std::make_unique(limit, offset); + } +}; + +/** + * @brief The QUERY struct embeds a subquery into this plan. + */ +struct QueryOperator : public Operator +{ + // QUERY is another plan, nested into the current plan. + QueryOperator(const RequiredSchema input_schema, const AttributeAdditions output_schema + // , const LogicalPlan& nested_plan + ) + : Operator{input_schema, output_schema, false} + { + } + + virtual ~QueryOperator() = default; + // LogicalPlan nested_plan; // TODO: move operator defintions + + std::string to_string() const override + { + return "SUBQUERY"s; + } + + std::unique_ptr clone() const override + { + return std::make_unique(this->requirements, this->additional_attributes); + } +}; + +struct CreateTableOperator : public Operator +{ + CreateTableOperator(const std::string &table_name_, std::vector &&column_names_, + std::vector &&column_types_, std::vector &&column_is_nullables_) + : Operator({}, {}, false), table_name(table_name_), column_names(column_names_), column_types(column_types_), + column_is_nullables(column_is_nullables_) + { + } + + virtual ~CreateTableOperator() = default; + + const std::string table_name; + const std::vector column_names; + const std::vector column_types; + const std::vector column_is_nullables; + + std::string to_string() const override + { + std::stringstream stream; + stream << "CREATE TABLE " << table_name << " ("; + for (auto i = 0u; i < column_names.size(); i++) + { + stream << column_names[i] << " (" << column_types[i].name() << " "; + if (column_is_nullables[i]) + { + stream << "NULL"; + } + else + { + stream << "NOT NULL"; + } + stream << ")"; + if (i < column_names.size() - 1) + { + stream << ","; + } + } + stream << " )"; + return stream.str(); + } + + std::unique_ptr clone() const override + { + return std::make_unique(table_name, std::vector(column_names), + std::vector(column_types), + std::vector(column_is_nullables)); + } +}; + +struct CreateIndexOperator : Operator +{ + CreateIndexOperator(Attribute &&indexed_attribute, const std::string &index_name_, const bool is_unique_, + const index::Type index_type) + : Operator({}, {}, false), column(indexed_attribute), index_name(index_name_), is_unique(is_unique_), + type(index_type) + { + } + + virtual ~CreateIndexOperator() = default; + + const Attribute column; + const std::string index_name; + const bool is_unique; + const index::Type type; + + std::string to_string() const override + { + return "CREATE INDEX " + index_name + " ON " + column.combined_name; + } + + std::unique_ptr clone() const override + { + return std::make_unique(expression::Attribute{column}, index_name, is_unique, type); + } +}; + +struct InsertOperator : Operator +{ + InsertOperator(const std::string &table_name_, const Attributes columns, + const std::vector>> &value_lists_) + : Operator({}, columns, false), table_name(table_name_), values_lists(value_lists_) + { + } + + ~InsertOperator() = default; + + const std::string table_name; + const std::vector>> values_lists; + + std::string to_string() const override + { + std::stringstream stream; + stream << "INSERT INTO " << table_name << " ("; + for (auto i = 0u; i < this->additional_attributes.size(); i++) + { + if (i > 0u) + { + stream << ","; + } + stream << this->additional_attributes[i].combined_name; + } + stream << ") VALUES "; + + for (auto i = 0u; i < values_lists.size(); i++) + { + if (i > 0u) + { + stream << ","; + } + stream << "("; + const auto &tuple = values_lists[i]; + for (auto j = 0u; j < tuple.size(); j++) + { + if (j > 0u) + { + stream << ","; + } + if (tuple[j]) + { + std::visit([&stream](const auto &value) { stream << value; }, tuple[j].value()); + } + else + { + stream << "NULL"; + } + } + stream << ")"; + } + + stream << std::flush; + + return stream.str(); + } + + std::unique_ptr clone() const override + { + return std::make_unique(table_name, additional_attributes, values_lists); + } +}; + +struct UpdateOperator : public Operator +{ + UpdateOperator(const std::vector> &updates_) + : Operator({}, {}, false), updates(updates_) + { + } + + virtual ~UpdateOperator() = default; + + std::vector> updates; + + std::string to_string() const override + { + return "UPDATE"; + } + + std::unique_ptr clone() const override + { + return std::make_unique(updates); + } +}; +} // namespace beedb::expression \ No newline at end of file diff --git a/src/include/expression/predicate.h b/src/include/expression/predicate.h new file mode 100644 index 0000000..63b68b5 --- /dev/null +++ b/src/include/expression/predicate.h @@ -0,0 +1,493 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "expression/attribute.h" +#include +#include +#include +#include +#include +#include + +namespace beedb::expression +{ + +/** + * This represents the result of a predicate. We need ternary logic, since all + * predicates, that involve attributes, can not be resolved statically. + * Therefore, only constants can evaluate to either true or false, everything + * else will result in std::nullopt, i.e. no result. + * + * This can be used to statically evaluate literal predicates at plan-creation + * time. + * + * TODO: finish implementation, when this feature is needed. + */ +using TernaryValue = std::optional; + +/** + * Operand of a predicate, type is defined by the parser. + * + * Node: Unequal types always compare to False. I.e. "1.0f == 1l" give False + */ +using Operand = std::variant; + +struct Atom +{ + const Operand left; + const Operand right; + + Atom(const Operand left_, const Operand right_) : left(left_), right(right_) + { + } + virtual ~Atom() = default; + + virtual std::unique_ptr clone(const Operand &left, const Operand &right) const = 0; + + std::unique_ptr clone() + { + return this->clone(left, right); + } + + virtual operator TernaryValue() const = 0; + + operator std::string() const + { + std::string left_string; + std::visit(OperandStringifier{left_string}, left); + std::string right_string; + std::visit(OperandStringifier{right_string}, right); + return left_string + " " + operatorString() + " " + right_string; + } + + protected: + virtual std::string operatorString() const = 0; + + bool semantic_check() const + { + if (std::holds_alternative(left) || std::holds_alternative(right)) + { + return false; + } + else if (left.index() != right.index()) + { + throw exception::LogicalException("No support for operands of different literal types!"); + } + return true; + } + + struct OperandStringifier + { + // helper struct for visitor pattern. + // implements string conversion of atoms by calling the + // proper conversion operation on the operands + std::string &rep; + + // depending on the variant type, + // c++ picks the appropiate call operator + + // NOTE: 'overload' might be of use here + // https://www.bfilipek.com/2018/06/variant.html#overload + void operator()(std::int64_t const &val) + { + rep = std::to_string(val); // converts operand to std::string + } + void operator()(float const &val) + { + rep = std::to_string(val); // converts operand to std::string + } + void operator()(bool const &val) + { + rep = std::to_string(std::int32_t(val)); // converts operand to std::string + } + void operator()(std::string const &val) + { + rep = "\'" + val + "\'"; // converts operand to std::string + } + void operator()(Attribute const &val) + { + rep = val; // casts to string + } + }; +}; + +struct EQ : public Atom +{ + EQ(const Operand left_, const Operand right_) : Atom{left_, right_} + { + } + + std::unique_ptr clone(const Operand &left_, const Operand &right_) const override + { + return std::make_unique(left_, right_); + } + + operator TernaryValue() const override + { + return semantic_check() ? std::optional(left == right) : std::nullopt; + } + std::string operatorString() const override + { + return "="; + } +}; + +struct NEQ : public Atom +{ + NEQ(const Operand left_, const Operand right_) : Atom{left_, right_} + { + } + + std::unique_ptr clone(const Operand &left_, const Operand &right_) const override + { + return std::make_unique(left_, right_); + } + + operator TernaryValue() const override + { + return semantic_check() ? std::optional(left != right) : std::nullopt; + } + std::string operatorString() const override + { + return "!="; + } +}; + +struct LT : public Atom +{ + LT(const Operand left_, const Operand right_) : Atom{left_, right_} + { + } + + std::unique_ptr clone(const Operand &left_, const Operand &right_) const override + { + return std::make_unique(left_, right_); + } + + operator TernaryValue() const override + { + return semantic_check() ? std::optional(left < right) : std::nullopt; + } + std::string operatorString() const override + { + return "<"; + } +}; + +struct GT : public Atom +{ + GT(const Operand left_, const Operand right_) : Atom{left_, right_} + { + } + + std::unique_ptr clone(const Operand &left_, const Operand &right_) const override + { + return std::make_unique(left_, right_); + } + + operator TernaryValue() const override + { + return semantic_check() ? std::optional(left > right) : std::nullopt; + } + std::string operatorString() const override + { + return ">"; + } +}; + +struct LE : public Atom +{ + LE(const Operand left_, const Operand right_) : Atom{left_, right_} + { + } + + std::unique_ptr clone(const Operand &left_, const Operand &right_) const override + { + return std::make_unique(left_, right_); + } + + operator TernaryValue() const override + { + return semantic_check() ? std::optional(left <= right) : std::nullopt; + } + std::string operatorString() const override + { + return "<="; + } +}; + +struct GE : public Atom +{ + GE(const Operand left_, const Operand right_) : Atom{left_, right_} + { + } + + std::unique_ptr clone(const Operand &left_, const Operand &right_) const override + { + return std::make_unique(left_, right_); + } + + operator TernaryValue() const override + { + return semantic_check() ? std::optional(left >= right) : std::nullopt; + } + + std::string operatorString() const override + { + return ">="; + } +}; + +/** + * Recursive definition of a predicate. Can either be an atom or a two-term + * combination of other predicates: + */ +struct LogicalConnective; + +using Predicate = std::variant, std::unique_ptr, bool>; + +/** + * @brief The Expression struct defines the logical combination of two + * Predicate objects. + */ +struct LogicalConnective +{ + protected: + LogicalConnective(Predicate &&left_, Predicate &&right_) : left(std::move(left_)), right(std::move(right_)) + { + } + + public: + virtual ~LogicalConnective() = default; + + virtual std::unique_ptr clone() const = 0; + + Predicate left; + Predicate right; + + /** + * @brief operator bool makes evaluation of expressions possible. + */ + operator TernaryValue() const + { + TernaryValue left_result; + std::visit(TermEvaluator{left_result}, left); + TernaryValue right_result; + std::visit(TermEvaluator{right_result}, right); + + return apply(left_result, right_result); + } + + operator std::string() const + { + std::string left_result; + std::visit(TermStringifier{left_result}, left); + std::string right_result; + std::visit(TermStringifier{right_result}, right); + return "(" + left_result + " " + connector() + " " + right_result + ")"; + } + + struct TermEvaluator + { + // helper struct for visitor pattern + // implements boolean evaluation of expressions by calling the + // proper conversion operation (to bool-conversion) + TernaryValue &result; + + // depending on the variant type, + // c++ picks the appropiate call operator: + void operator()(const std::unique_ptr &atom) + { + result = static_cast(*atom); // converts atom to bool + } + void operator()(const std::unique_ptr &expression) + { + result = static_cast(*expression); + } + + void operator()(const bool truth_value) + { + result = truth_value; + } + }; + struct TermStringifier + { + // helper struct for visitor pattern + // implements boolean evaluation of expressions by calling the + // proper conversion operation (to bool-conversion) + std::string &rep; + + // depending on the variant type, + // c++ picks the appropiate call operator: + void operator()(const std::unique_ptr &atom) + { + rep = static_cast(*atom); // converts atom to bool + } + void operator()(const std::unique_ptr &expression) + { + rep = static_cast(*expression); + } + + void operator()(const bool truth_value) + { + rep = truth_value ? "True" : "False"; + } + }; + + protected: + virtual TernaryValue apply(TernaryValue, TernaryValue) const = 0; + virtual std::string connector() const = 0; +}; + +static inline std::string to_string(const Predicate &predicate) +{ + std::string str; + std::visit(LogicalConnective::TermStringifier{str}, predicate); + return str; +} + +static inline TernaryValue statically_evaluate(const Predicate &predicate) +{ + TernaryValue result; + std::visit(LogicalConnective::TermEvaluator{result}, predicate); + return result; +} + +static inline Predicate clone_predicate(const Predicate &predicate) +{ + if (std::holds_alternative>(predicate)) + { + return std::get>(predicate)->clone(); + } + else if (std::holds_alternative>(predicate)) + { + return std::get>(predicate)->clone(); + } + else if (std::holds_alternative(predicate)) + { + return std::get(predicate); + } + + assert(false && "Could not clone predicate"); + return {}; +} + +struct AND : public LogicalConnective +{ + AND(Predicate &&left_, Predicate &&right_) : LogicalConnective(std::move(left_), std::move(right_)) + { + } + + std::unique_ptr clone() const override + { + auto left_clone = clone_predicate(left); + auto right_clone = clone_predicate(right); + return std::make_unique(std::move(left_clone), std::move(right_clone)); + } + + TernaryValue apply(TernaryValue left_, TernaryValue right_) const override + { + if (left_.has_value()) + { + if (!left_.value()) + { // if the lhs expression is False... + // ... the whole statement is always False! + return false; + } + return right_; // .. if true, AND is a neutral/redundant operation + } + else if (right_.has_value()) + { + if (!right_.value()) + { // if the rhs expression is False... + // ... the whole statement is always False! + return false; + } + return left_; // .. if true, AND is a neutral/redundant operation + } + else + { + // when both values are std::nullopt, then.... + return std::nullopt; // ..this statement is not statically evaluable! + } + } + std::string connector() const override + { + return "&&"; + } +}; + +struct OR : public LogicalConnective +{ + OR(Predicate &&left_, Predicate &&right_) : LogicalConnective(std::move(left_), std::move(right_)) + { + } + + std::unique_ptr clone() const override + { + auto left_clone = clone_predicate(left); + auto right_clone = clone_predicate(right); + return std::make_unique(std::move(left_clone), std::move(right_clone)); + } + + TernaryValue apply(TernaryValue left_, TernaryValue right_) const override + { + if (left_.has_value()) + { + if (left_.value()) + { // if the lhs expression is True... + // ... the whole statement is always True! + return true; + } + return right_; // .. if False, OR is a neutral/redundant operation + } + else if (right_.has_value()) + { + if (right_.value()) + { // if the rhs expression is True... + // ... the whole statement is always true! + return true; + } + return left_; // .. if False, OR is a neutral/redundant operation + } + else + { + // when both values are std::nullopt, then.... + return std::nullopt; // ..this statement is not statically evaluable! + } + } + std::string connector() const override + { + return "||"; + } +}; + +class PredicateAnalyzer +{ + public: + static expression::Attributes attributes(const Predicate &predicate); + static bool contains_range_predicate(const Predicate &predicate); + static bool contains_not_equals_predicate(const Predicate &predicate); +}; +} // namespace beedb::expression \ No newline at end of file diff --git a/src/include/index/b_plus_tree/b_plus_tree.h b/src/include/index/b_plus_tree/b_plus_tree.h new file mode 100644 index 0000000..ebf7d40 --- /dev/null +++ b/src/include/index/b_plus_tree/b_plus_tree.h @@ -0,0 +1,285 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "b_plus_tree_node.h" +#include "index/non_unique_index_interface.h" +#include "index/return_value.h" +#include +#include +#include +#include +#include +#include + +namespace beedb::index::bplustree +{ +template class BPlusTree +{ + public: + using Node = BPlusTreeNode; + + BPlusTree() : _root(new Node(true)) + { + } + ~BPlusTree() + { + delete _root; + } + + /** + * Inserts the given key-value-pair into the tree. + * + * @param key + * @param value + */ + void put(const K key, V value); + + /** + * Finds the value by the given key. + * + * @param key + * @return The found value. + */ + std::optional::type> get(const K key) const; + + std::optional> get(const K key_from, const K key_to) const; + + inline Node *root() const + { + return _root; + } + inline size_type height() const + { + return _height; + } + + private: + Node *_root; + size_type _height = 1; + + /** + * Locates a leaf node for a given key. + * + * @param key + * @param node_path + * @return + */ + Node *locate_leaf(const K key, std::vector *node_path = nullptr) const; + + /** + * Inserts the given key-value-tuple into the give leaf node. + * + * @param leaf_node + * @param key + * @param value + * @return + */ + Node *insert_into_leaf(Node *leaf_node, const K key, const V value); + + /** + * Inserts the given key and separator into the given inner node. + * + * @param inner_node + * @param key + * @param separator + * @return + */ + std::pair insert_into_inner(Node *inner_node, const K key, Node *separator); + + /** + * Creates a new root with pointer to the two given new child nodes. + * + * @param left + * @param right + * @param key + */ + void install_new_root_node(Node *left, Node *right, const K key); + + /** + * Splits the given inner node and returns the new node and a key, + * that has to be inserted into the parent node. + * + * @param inner_node + * @param key + * @param separator + * @return + */ + std::pair split_inner_node(Node *inner_node, const K key, Node *separator); + + /** + * Splits the given leaf node and returns the new node. + * + * @param leaf_node + * @return + */ + Node *split_leaf_node(Node *leaf_node); + + friend std::ostream &operator<<(std::ostream &stream, const BPlusTree &tree) + { + Node *root = tree.root(); + if (root == nullptr) + { + return stream; + } + + const auto items = tree.root()->size_include_children(); + const auto nodes = tree.root()->count_children(); + + return stream << "Height = " << tree.height() << "\n" + << "Key-Value-Pairs = " << items.second << "\n" + << "Inner-Nodes = " << nodes.first << "\n" + << "Leaf-Nodes = " << nodes.second << "\n" + << "Memory = " + << ((nodes.first + nodes.second) * Config::b_plus_tree_page_size) / 1024 / 1024 << " MB\n"; + } +}; + +template void BPlusTree::put(const K key, V value) +{ + // Path for traversal. All nodes from root excluding the leaf node will be stored. + std::vector path; + path.reserve(6); + + // Locate the possible leaf. + Node *leaf = this->locate_leaf(key, &path); + + // Insert into leaf + K up_key; + Node *new_node = this->insert_into_leaf(leaf, key, value); + if (new_node != nullptr) + { + up_key = new_node->leaf_key(0u); + } + + // Propagate up. + while (new_node != nullptr && path.empty() == false) + { + Node *parent = path.back(); + path.pop_back(); + auto [n, u] = this->insert_into_inner(parent, up_key, new_node); + new_node = n; + up_key = u; + } + + // Create new root + if (new_node != nullptr) + { + this->install_new_root_node(_root, new_node, up_key); + } +} + +template +std::pair *, K> BPlusTree::insert_into_inner(BPlusTree::Node *inner_node, + const K key, + BPlusTree::Node *separator) +{ + if (inner_node->is_full() == false) + { + const size_type index = inner_node->index(key); + inner_node->insert_separator(index, separator, key); + return {static_cast(nullptr), 0}; + } + else + { + return this->split_inner_node(inner_node, key, separator); + } +} + +template +void BPlusTree::install_new_root_node(BPlusTree::Node *left, BPlusTree::Node *right, + const K key) +{ + Node *new_root = new Node(false); + new_root->separator(0, left); + new_root->insert_separator(0, right, key); + _height++; + _root = new_root; +} + +template +std::pair *, K> BPlusTree::split_inner_node(BPlusTree::Node *inner_node, + const K key, + BPlusTree::Node *separator) +{ + constexpr size_type left_size = BPlusTreeInnerNode::max_keys / 2; + constexpr size_type right_size = BPlusTreeInnerNode::max_keys - left_size; + + K key_up; + Node *new_inner_node = new Node(false); + new_inner_node->right(inner_node->right()); + inner_node->right(new_inner_node); + + if (key < inner_node->inner_key(left_size - 1)) + { + inner_node->copy(new_inner_node, left_size, right_size); + new_inner_node->separator(0, inner_node->separator(left_size)); + new_inner_node->size(right_size); + + key_up = inner_node->inner_key(left_size - 1); + inner_node->size(left_size - 1); + + const size_type index = inner_node->index(key); + inner_node->insert_separator(index, separator, key); + } + else if (key < inner_node->inner_key(left_size)) + { + inner_node->copy(new_inner_node, left_size, right_size); + new_inner_node->separator(0, separator); + key_up = key; + inner_node->size(left_size); + new_inner_node->size(right_size); + } + else + { + inner_node->copy(new_inner_node, left_size + 1, right_size - 1); + new_inner_node->separator(0, inner_node->separator(left_size + 1)); + inner_node->size(left_size); + new_inner_node->size(right_size - 1); + key_up = inner_node->inner_key(left_size); + + const size_type index = new_inner_node->index(key); + new_inner_node->insert_separator(index, separator, key); + } + + return {new_inner_node, key_up}; +} + +template +BPlusTreeNode *BPlusTree::split_leaf_node(BPlusTree::Node *leaf_node) +{ + constexpr size_type left_size = BPlusTreeLeafNode::max_items / 2; + constexpr size_type right_size = BPlusTreeLeafNode::max_items - left_size; + + Node *new_leaf_node = new Node(true); + new_leaf_node->right(leaf_node->right()); + leaf_node->right(new_leaf_node); + + leaf_node->copy(new_leaf_node, left_size, right_size); + new_leaf_node->size(right_size); + leaf_node->size(left_size); + + return new_leaf_node; +} +} // namespace beedb::index::bplustree + +#include "b_plus_tree.hpp" \ No newline at end of file diff --git a/src/include/index/b_plus_tree/b_plus_tree.hpp b/src/include/index/b_plus_tree/b_plus_tree.hpp new file mode 100644 index 0000000..39d348e --- /dev/null +++ b/src/include/index/b_plus_tree/b_plus_tree.hpp @@ -0,0 +1,200 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include "index/return_value.h" +#include +#include +#include +#include +#include + +namespace beedb::index::bplustree +{ +template +BPlusTreeNode *BPlusTree::locate_leaf(const K key, + std::vector *> *node_path) const +{ + /** + * Assignment (2): Implement a B+-Tree + * + * The B+-Tree is used for indexing files. Using the index + * for bigger data sets will reduce the amount of scanned disk pages + * in case the query wants to filter the data. + * + * This method is used to traverse the tree and locate a leaf that may + * contain the wanted key. During the traversal for inserts, all visited nodes + * are stored in the node_path container. For lookups, the container is null. + * + * Hints for implementation: + * - "this->_root" stores the root node of the tree, where every + * lookup for a leaf is started. + * - Every inner node (also the root) has a "child(k)" method, which returns + * the next node on the way for the leaf, that may contain the key "k". + * - To check a node whether it is a leaf or inner node, use the "is_leaf()" + * method on a node, which returns true if the node is a leaf. + * - If "node_path" is not a "nullptr", push all nodes during traversal + * to that vector (also the root). + * + * + * Procedure: + * - Start the traversal at the root node. + * - Get the next node using "current_node->child(key)" while "current_node" + * is not a leaf. + * - Push every node to the "node_path", if "node_path != nullptr". + * - Return the leaf you found during traversal. + */ + + Node *current_node = this->_root; + + + return current_node; +} + +template +std::optional::type> BPlusTree::get(const K key) const +{ + /** + * Assignment (2): Implement a B+-Tree + * + * This method tries to find the value for a given key. + * The tree is a very generic data structure which can hold + * one value per key or multiple values per key. The specific + * variant is given by the template parameter U which is a bool + * and stands for Unique. True means: Return one value of type V; + * false means: Return a set of values of type V. + * + * Hints for implementation: + * - You have already implemented "locate_leaf(k)" which returns the + * leaf that may contain the searched key-value pair. + * - Every leaf node provides a method "index(k)" which returns the index + * of the key "k". + * - Every leaf node provides a method "leaf_key(i)" which returns the + * key at index "i". + * - Every leaf node provides a method "value(i)" which returns the value + * at index "i". The "value(i)" method will automatically pick the correct + * return type, depending on the U-template-parameter. + * + * Procedure: + * - Locate the leaf node that may contain the wanted key. + * - Check the leaf node: Is the wanted key available? + * - If yes: return the value of the key. + * - Otherwise return an empty result, using "return { };". + */ + + + return std::nullopt; +} + +template +std::optional> BPlusTree::get(const K key_from, const K key_to) const +{ + /** + * Assignment (2): Implement a B+-Tree + * + * This method tries to find one or multiple values for a given + * range of keys. + * The tree is a very generic data structure which can hold + * one value per key or multiple values per key. The specific + * variant is given by the template parameter U which is a bool + * and stands for Unique. True means: Return one value of type V; + * false means: Return a set of values of type V. + * + * Hints for implementation: + * - You have already implemented "locate_leaf(k)" which returns the + * leaf that may contain the searched key-value pair. + * - Every node provides a method "right()" which returns a pointer + * to the right neighbour node. + * - Every node provides a method "size()" which returns the number of + * items that are stored in the node. + * - Every leaf node provides a method "index(k)" which returns the index + * of the key "k". + * - Every leaf node provides a method "leaf_key(i)" which returns the + * key at index "i". + * - Every leaf node provides a method "value(i)" which returns the value + * at index "i". The "value(i)" method will automatically pick the correct + * return type, depending on the U-template-parameter. + * - You can test whether it is a unique or non-unique tree, using + * "if constexpr(U) { code for unique... } else { core for non-unique... }. + * - Both containers std::set (https://en.cppreference.com/w/cpp/container/set) + * and std::optional (https://en.cppreference.com/w/cpp/utility/optional) may + * be helpful on compiler errors. + * + * Procedure: + * - Locate the leaf node that may contain the wanted key. + * - Add all keys that are equal or greater than the key "key_from" + * and equal or lesser than the key "key_to" to a set of values. + * - When the last key of the node matches that predicate, also + * take a look to the right neighbour using the "right()" method + * (and also the rights right,...). + */ + + std::set values; + + + return values; +} + +template +BPlusTreeNode *BPlusTree::insert_into_leaf(BPlusTreeNode *leaf_node, const K key, + const V value) +{ + /** + * Assignment (2): Implement a B+-Tree + * + * This method adds a value to a leaf node. The correct leaf node, key and + * value are all given. When inserting results in splitting the leaf, + * the pointer to the new created node is returned. + * + * Hints for implementation: + * - Every node provides a method "full()" which returns true, if there + * is no more place for a new item. + * - Every leaf node provides a method "index(k)" which returns the index + * of the key "k". + * - Every leaf node provides a method "leaf_key(i)" which returns the + * key at index "i". + * - Every leaf node provides a method "insert_value(i, v, k)" which adds + * a key-value pair (k,v) to the leaf at index i. + * - The tree has a method "this->split_leaf_node(l)" which splits the leaf + * node l and returns a pointer to the new node. + * - You can test whether it is a unique or non-unique tree, using + * "if constexpr(U) { code for unique... } else { core for non-unique... }. + * + * Procedure: + * - Check if the leaf node already contains the key + * - If yes and the tree is non-unique: add the value to the list of values + * in the node and return a "nullptr". + * - If yey and the tree is unique: Just return a "nullptr". + * - If the key is not in the node, check for space for a new (key,value) pair. + * - If the node is not full, insert the new pair and return a "nullptr" + * - Otherwise, we have to split the node. Splitting will create a new leaf node, + * the new right neighbour of the given leaf node. + * - After splitting, we have enough space to insert the pair. Check whether the key + * should take place in the given leaf or the new leaf, created on splitting: + * When the key is lower than the first key of the new leaf, the key should be insert + * into the given leaf, otherwise in the new leaf. + * - After splitting, return the pointer to the new leaf. + */ + + + return nullptr; +} +} // namespace beedb::index::bplustree \ No newline at end of file diff --git a/src/include/index/b_plus_tree/b_plus_tree_node.h b/src/include/index/b_plus_tree/b_plus_tree_node.h new file mode 100644 index 0000000..5fcf7b3 --- /dev/null +++ b/src/include/index/b_plus_tree/b_plus_tree_node.h @@ -0,0 +1,294 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "index/return_value.h" +#include +#include +#include +#include +#include + +namespace beedb::index::bplustree +{ +using size_type = std::size_t; + +template class BPlusTreeNode; + +template struct BPlusTreeNodeHeader +{ + size_type size = 0; + bool is_leaf; + BPlusTreeNode *right = nullptr; + + BPlusTreeNodeHeader(const bool is_leaf_) : is_leaf(is_leaf_) + { + } +}; + +template struct BPlusTreeLeafNode +{ + static constexpr size_type max_items = (Config::b_plus_tree_page_size - sizeof(BPlusTreeNodeHeader)) / + (sizeof(K) + sizeof(typename ReturnValue::type)); + + std::array keys; + std::array::type, BPlusTreeLeafNode::max_items> values; +}; + +template struct BPlusTreeInnerNode +{ + static constexpr size_type max_keys = + (Config::b_plus_tree_page_size - sizeof(BPlusTreeNodeHeader) - sizeof(BPlusTreeNode *)) / + (sizeof(K) + sizeof(BPlusTreeInnerNode *)); + static constexpr size_type max_separators = max_keys + 1; + + std::array keys; + std::array *, BPlusTreeInnerNode::max_separators> separators; +}; + +template class BPlusTreeNode +{ + public: + BPlusTreeNode(const bool is_leaf) : _header(is_leaf) + { + } + ~BPlusTreeNode(); + + inline bool is_leaf() const + { + return _header.is_leaf; + } + inline bool is_inner() const + { + return is_leaf() == false; + } + inline size_type size() const + { + return _header.size; + } + inline void size(const size_type size) + { + _header.size = size; + } + inline BPlusTreeNode *right() + { + return _header.right; + } + inline bool has_right() const + { + return _header.right != nullptr; + } + inline void right(BPlusTreeNode *right) + { + _header.right = right; + } + + inline typename ReturnValue::type &value(const size_type index) + { + return _leaf_node.values[index]; + } + inline BPlusTreeNode *separator(const size_type index) + { + return _inner_node.separators[index]; + } + inline void separator(const size_type index, BPlusTreeNode *separator) + { + _inner_node.separators[index] = separator; + } + + inline K leaf_key(const size_type index) + { + return _leaf_node.keys[index]; + } + inline K inner_key(const size_type index) + { + return _inner_node.keys[index]; + } + + inline bool is_full() const + { + const size_type max_size = + is_leaf() ? BPlusTreeLeafNode::max_items : BPlusTreeInnerNode::max_keys; + return size() >= max_size; + } + + size_type index(const K key); + BPlusTreeNode *child(const K key); + + void insert_separator(const size_type index, BPlusTreeNode *separator, const K key); + void insert_value(const size_type index, const V value, const K key); + void copy(BPlusTreeNode *other, const size_type from_index, const size_type count); + + std::pair size_include_children(); + std::pair count_children(); + + private: + BPlusTreeNodeHeader _header; + + union { + BPlusTreeInnerNode _inner_node; + BPlusTreeLeafNode _leaf_node; + }; +}; + +template BPlusTreeNode::~BPlusTreeNode() +{ + if (is_leaf() == false) + { + for (size_type i = 0; i < size(); i++) + { + delete _inner_node.separators[i]; + } + } +} + +template size_type BPlusTreeNode::index(const K key) +{ + auto keys = is_leaf() ? _leaf_node.keys.begin() : _inner_node.keys.begin(); + auto iterator = std::lower_bound(keys, keys + size(), key); + + return std::distance(keys, iterator); +} + +template BPlusTreeNode *BPlusTreeNode::child(const K key) +{ + std::int32_t low = 0, high = size() - 1; + while (low <= high) + { + const std::int32_t mid = (low + high) / 2; + if (_inner_node.keys[mid] <= key) + { + low = mid + 1; + } + else + { + high = mid - 1; + } + } + + return _inner_node.separators[high + 1]; +} + +template +void BPlusTreeNode::insert_separator(const size_type index, BPlusTreeNode *separator, const K key) +{ + if (index < size()) + { + const size_type offset = size() - index; + std::memmove(&_inner_node.keys[index + 1], &_inner_node.keys[index], offset * sizeof(K)); + std::memmove(&_inner_node.separators[index + 2], &_inner_node.separators[index + 1], + offset * sizeof(BPlusTreeNode *)); + } + + _inner_node.keys[index] = key; + _inner_node.separators[index + 1] = separator; + _header.size++; +} + +template +void BPlusTreeNode::insert_value(const size_type index, const V value, const K key) +{ + if (index < size()) + { + const size_type offset = size() - index; + std::memmove(&_leaf_node.keys[index + 1], &_leaf_node.keys[index], offset * sizeof(K)); + std::memmove(static_cast(&_leaf_node.values[index + 1]), &_leaf_node.values[index], + offset * sizeof(typename ReturnValue::type)); + } + + _leaf_node.keys[index] = key; + + if constexpr (U) + { + _leaf_node.values[index] = value; + } + else + { + new (&_leaf_node.values[index]) typename ReturnValue::type(); + _leaf_node.values[index].insert(value); + } + + _header.size++; +} + +template +void BPlusTreeNode::copy(BPlusTreeNode *other, const size_type from_index, const size_type count) +{ + if (is_leaf()) + { + std::memcpy(&other->_leaf_node.keys[0], &_leaf_node.keys[from_index], count * sizeof(K)); + std::memcpy(static_cast(&other->_leaf_node.values[0]), &_leaf_node.values[from_index], + count * sizeof(typename ReturnValue::type)); + } + else + { + std::memcpy(&other->_inner_node.keys[0], &_inner_node.keys[from_index], count * sizeof(K)); + std::memcpy(&other->_inner_node.separators[1], &_inner_node.separators[from_index + 1], + count * sizeof(BPlusTreeNode *)); + } +} + +template +std::pair BPlusTreeNode::size_include_children() +{ + if (is_leaf()) + { + return {0u, size()}; + } + + std::size_t leaf_sizes = 0, inner_sizes = 0; + for (auto i = 0u; i <= size(); i++) + { + BPlusTreeNode *child = _inner_node.separators[i]; + const auto child_size = child->size_include_children(); + inner_sizes += child_size.first; + leaf_sizes += child_size.second; + } + + return {inner_sizes, leaf_sizes}; +} + +template std::pair BPlusTreeNode::count_children() +{ + if (is_leaf()) + { + return {0u, 0u}; + } + + if (_inner_node.separators[0]->is_leaf()) + { + return {0u, size() + 1u}; + } + + std::size_t leaf_children = 0, inner_children = 0; + for (auto i = 0u; i <= size(); i++) + { + BPlusTreeNode *child = _inner_node.separators[i]; + + const auto child_size = child->count_children(); + inner_children += child_size.first; + leaf_children += child_size.second; + } + + return {inner_children + size(), leaf_children}; +} +} // namespace beedb::index::bplustree \ No newline at end of file diff --git a/src/include/index/b_plus_tree/non_unique_b_plus_tree_index.h b/src/include/index/b_plus_tree/non_unique_b_plus_tree_index.h new file mode 100644 index 0000000..98f7d1d --- /dev/null +++ b/src/include/index/b_plus_tree/non_unique_b_plus_tree_index.h @@ -0,0 +1,72 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "b_plus_tree.h" +#include +#include +#include + +namespace beedb::index::bplustree +{ +/** + * B+-Tree implementation for non-unique (key,value) pairs. + * Can store multiple values for each key. + * Supports range queries. + */ +class NonUniqueBPlusTreeIndex : public IndexInterface, public NonUniqueIndexInterface, public RangeIndexInterface +{ + public: + NonUniqueBPlusTreeIndex(const std::string &name) : IndexInterface(name) + { + } + virtual ~NonUniqueBPlusTreeIndex() = default; + + virtual bool supports_range() const + { + return true; + } + virtual bool is_unique() const + { + return false; + } + + virtual void put(const std::int64_t key, disk::Page::page_id page_pointer) + { + _tree.put(key, page_pointer); + } + + virtual std::optional> get(const std::int64_t key) const + { + return _tree.get(key); + } + + virtual std::optional> get(const std::int64_t key_from, const std::int64_t key_to) + { + return _tree.get(key_from, key_to); + } + + private: + BPlusTree _tree; +}; +} // namespace beedb::index::bplustree \ No newline at end of file diff --git a/src/include/index/b_plus_tree/unique_b_plus_tree_index.h b/src/include/index/b_plus_tree/unique_b_plus_tree_index.h new file mode 100644 index 0000000..153c017 --- /dev/null +++ b/src/include/index/b_plus_tree/unique_b_plus_tree_index.h @@ -0,0 +1,71 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "b_plus_tree.h" +#include +#include +#include + +namespace beedb::index::bplustree +{ +/** + * B+-Tree implementation for unique (key,value) pairs. + * Supports range queries. + */ +class UniqueBPlusTreeIndex : public IndexInterface, public UniqueIndexInterface, public RangeIndexInterface +{ + public: + UniqueBPlusTreeIndex(const std::string &name) : IndexInterface(name) + { + } + virtual ~UniqueBPlusTreeIndex() = default; + + virtual bool supports_range() const + { + return true; + } + virtual bool is_unique() const + { + return true; + } + + virtual void put(const std::int64_t key, disk::Page::page_id page_pointer) + { + _tree.put(key, page_pointer); + } + + virtual std::optional get(const std::int64_t key) const + { + return _tree.get(key); + } + + virtual std::optional> get(const std::int64_t key_from, const std::int64_t key_to) + { + return _tree.get(key_from, key_to); + } + + private: + BPlusTree _tree; +}; +} // namespace beedb::index::bplustree \ No newline at end of file diff --git a/src/include/index/index_factory.h b/src/include/index/index_factory.h new file mode 100644 index 0000000..2cc9202 --- /dev/null +++ b/src/include/index/index_factory.h @@ -0,0 +1,60 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "b_plus_tree/non_unique_b_plus_tree_index.h" +#include "b_plus_tree/unique_b_plus_tree_index.h" +#include "index_interface.h" +#include "type.h" +#include + +namespace beedb::index +{ +class IndexFactory +{ + public: + /** + * Builds an empty index. + * + * @param name Name for the index. + * @param type The index type identifies the underlying data structure like BTree, Hashtable, ... + * @param is_unique Identifies whether the index can hold multiple values for one key. + * @return Pointer to the in-memory index structure. + */ + static std::shared_ptr new_index(const std::string &name, const Type type, const bool is_unique) + { + if (type == Type::BTree) + { + if (is_unique) + { + return std::make_shared(name); + } + else + { + return std::make_shared(name); + } + } + + return {}; + } +}; +} // namespace beedb::index \ No newline at end of file diff --git a/src/include/index/index_interface.h b/src/include/index/index_interface.h new file mode 100644 index 0000000..b0f5e71 --- /dev/null +++ b/src/include/index/index_interface.h @@ -0,0 +1,70 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include + +namespace beedb::index +{ +/** + * The index interface offers an API for multiple indices, whether + * they are range-indices, support multiple values for one key. + */ +class IndexInterface +{ + public: + IndexInterface(const std::string &name) : _name(name) + { + } + virtual ~IndexInterface() = default; + + /** + * @return True, if the index supports just one value per key. + */ + virtual bool is_unique() const = 0; + + /** + * @return True, if the index supports range queries (like B+Trees). + */ + virtual bool supports_range() const = 0; + + /** + * Stores a (Key,Page) pair in the index. + * + * @param key Key for lookups. + * @param page_id Value. + */ + virtual void put(const std::int64_t key, disk::Page::page_id page_id) = 0; + + /** + * @return Name of the index. + */ + const std::string &name() const + { + return _name; + } + + private: + const std::string _name; +}; +} // namespace beedb::index \ No newline at end of file diff --git a/src/include/index/non_unique_index_interface.h b/src/include/index/non_unique_index_interface.h new file mode 100644 index 0000000..961d759 --- /dev/null +++ b/src/include/index/non_unique_index_interface.h @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include + +namespace beedb::index +{ +/** + * Interface for non-unique indices. + * The index can store multiple values per key. + */ +class NonUniqueIndexInterface +{ + public: + NonUniqueIndexInterface() = default; + virtual ~NonUniqueIndexInterface() = default; + + /** + * Lookup for all values of a key. + * + * @param key Key to lookup. + * @return All stored values for the key. + */ + virtual std::optional> get(const std::int64_t key) const = 0; +}; +} // namespace beedb::index \ No newline at end of file diff --git a/src/include/index/range_index_interface.h b/src/include/index/range_index_interface.h new file mode 100644 index 0000000..9cad951 --- /dev/null +++ b/src/include/index/range_index_interface.h @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include +#include +#include + +namespace beedb::index +{ +/** + * Interface for range indices. + * The index can lookup a range of keys for range-queries. + */ +class RangeIndexInterface +{ + public: + /** + * Lookup of a key-range. + * + * @param key_from From key. + * @param key_to To key. + * @return All values for the given key-range. + */ + virtual std::optional> get(const std::int64_t key_from, + const std::int64_t key_to) = 0; +}; +} // namespace beedb::index \ No newline at end of file diff --git a/src/include/index/return_value.h b/src/include/index/return_value.h new file mode 100644 index 0000000..a440cce --- /dev/null +++ b/src/include/index/return_value.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include + +namespace beedb::index +{ +template struct ReturnValue +{ + using type = Value; +}; +template struct ReturnValue +{ + using type = std::set; +}; +} // namespace beedb::index \ No newline at end of file diff --git a/src/include/index/type.h b/src/include/index/type.h new file mode 100644 index 0000000..65a4eb9 --- /dev/null +++ b/src/include/index/type.h @@ -0,0 +1,39 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include + +namespace beedb::index +{ +/** + * Possible index types. + */ +enum Type +{ + None, + BTree, + Hashtable, + Skiplist, + Bitmap /*, some more like: SkipList,... */ +}; +} // namespace beedb::index \ No newline at end of file diff --git a/src/include/index/unique_index_interface.h b/src/include/index/unique_index_interface.h new file mode 100644 index 0000000..ee391b1 --- /dev/null +++ b/src/include/index/unique_index_interface.h @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include + +namespace beedb::index +{ +/** + * Interface for unique indices. + * The index can store only one value per key. + */ +class UniqueIndexInterface +{ + public: + UniqueIndexInterface() = default; + virtual ~UniqueIndexInterface() = default; + + /** + * Lookup for a value for a given key. + * + * @param key Key to lookup. + * @return The stored value or an empty optional. + */ + virtual std::optional get(const std::int64_t key) const = 0; +}; +} // namespace beedb::index \ No newline at end of file diff --git a/src/include/io/command/commander.h b/src/include/io/command/commander.h new file mode 100644 index 0000000..d09cd88 --- /dev/null +++ b/src/include/io/command/commander.h @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include "custom_command_interface.h" +#include +#include +#include +#include + +namespace beedb::io::command +{ +class Commander +{ + public: + Commander() = default; + ~Commander() = default; + + bool has_command_prefix(const std::string &input); + + std::optional create_query(const std::string &input); + + void register_command(const std::string &name, std::unique_ptr &&command) + { + _registered_commands[name] = std::move(command); + } + + private: + std::unordered_map> _registered_commands; +}; +} // namespace beedb::io::command \ No newline at end of file diff --git a/src/include/io/command/custom_command_interface.h b/src/include/io/command/custom_command_interface.h new file mode 100644 index 0000000..f4fb21b --- /dev/null +++ b/src/include/io/command/custom_command_interface.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "io/executor.h" +#include +#include + +namespace beedb::io::command +{ +class CustomCommandInterface +{ + public: + virtual ~CustomCommandInterface() = default; + virtual std::optional execute(const std::string &input) = 0; + + virtual std::string help_str() = 0; +}; +} // namespace beedb::io::command \ No newline at end of file diff --git a/src/include/io/command/custom_commands.h b/src/include/io/command/custom_commands.h new file mode 100644 index 0000000..f875d3f --- /dev/null +++ b/src/include/io/command/custom_commands.h @@ -0,0 +1,97 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "custom_command_interface.h" + +namespace beedb::io::command +{ +class ShowCommand final : public CustomCommandInterface +{ + public: + virtual ~ShowCommand() = default; + virtual std::optional execute(const std::string ¶meters) override; + virtual std::string help_str() override + { + return std::string(_help_str); + }; + + private: + static constexpr auto _help_str = "Syntax> :show [tables,indices,columns]{1}"; +}; + +class ExplainCommand final : public CustomCommandInterface +{ + public: + virtual ~ExplainCommand() = default; + virtual std::optional execute(const std::string &input) override; + virtual std::string help_str() override + { + return std::string(_help_str); + }; + + private: + static constexpr auto _help_str = "Syntax> :explain [plan,graph]? "; +}; + +class SetCommand final : public CustomCommandInterface +{ + public: + SetCommand(Config &config) : _config(config) + { + } + virtual ~SetCommand() = default; + virtual std::optional execute(const std::string &input) override; + + virtual std::string help_str() override + { + return std::string(_help_str); + }; + + private: + static constexpr auto _help_str = "Syntax> :set "; + + private: + Config &_config; +}; + +class GetCommand final : public CustomCommandInterface +{ + public: + GetCommand(Config &config) : _config(config) + { + } + virtual ~GetCommand() = default; + virtual std::optional execute(const std::string &input) override; + + virtual std::string help_str() override + { + return std::string(_help_str); + }; + + private: + static constexpr auto _help_str = "Syntax> :get []?"; + + private: + Config &_config; +}; +} // namespace beedb::io::command \ No newline at end of file diff --git a/src/include/io/executor.h b/src/include/io/executor.h new file mode 100644 index 0000000..80f0341 --- /dev/null +++ b/src/include/io/executor.h @@ -0,0 +1,136 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include + +namespace beedb::io +{ +/** + * @brief The Query struct contains the query-string and some parameters + * that influence the execution of a query, e.g. printing the logical plan + */ +struct Query +{ + enum ExplainLevel + { + None, + Plan, + Graph + }; + + const std::string query_string; + ExplainLevel explain = None; +}; + +/** + * Wrapper for time and performance statistics, measured + * while query execution. + */ +class ExecutionResult +{ + public: + ExecutionResult() : _is_successful(false) + { + } + + ExecutionResult(const std::chrono::milliseconds build_time, const std::chrono::milliseconds execution_time, + const std::size_t evicted_pages) + : _is_successful(true), _build_ms(build_time), _execution_ms(execution_time), _evicted_pages(evicted_pages) + { + } + + ~ExecutionResult() = default; + + std::chrono::milliseconds build_time() const + { + return _build_ms; + } + + std::chrono::milliseconds execution_time() const + { + return _execution_ms; + } + + std::size_t evicted_pages() const + { + return _evicted_pages; + } + + bool is_successful() const + { + return _is_successful; + } + + private: + const bool _is_successful = false; + const std::chrono::milliseconds _build_ms{}; + const std::chrono::milliseconds _execution_ms{}; + const std::size_t _evicted_pages = 0u; +}; + +/** + * Executes queries and query plans. + */ +class Executor +{ + public: + Executor(Database &database) : _database(database) + { + } + ~Executor() = default; + + ExecutionResult execute(const Query &query, std::function schema_callback, + std::function tuple_callback); + + ExecutionResult execute(const Query &query, std::function tuple_callback) + { + return execute( + query, [](const table::Schema &) {}, tuple_callback); + } + + ExecutionResult execute(const Query &query) + { + return execute(query, [](const table::Tuple &) {}); + } + + ExecutionResult execute(plan::physical::Plan &plan, std::function schema_callback, + std::function tuple_callback); + + ExecutionResult execute(plan::physical::Plan &plan, std::function tuple_callback) + { + return execute( + plan, [](const table::Schema &) {}, tuple_callback); + } + + ExecutionResult execute(plan::physical::Plan &plan) + { + return execute(plan, [](const table::Tuple &) {}); + } + + protected: + Database &_database; +}; +} // namespace beedb::io \ No newline at end of file diff --git a/src/include/io/file_executor.h b/src/include/io/file_executor.h new file mode 100644 index 0000000..88875e1 --- /dev/null +++ b/src/include/io/file_executor.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "executor.h" + +namespace beedb::io +{ +/** + * Parses files for SQL-queries and executes them. + */ +class FileExecutor : private Executor +{ + public: + FileExecutor(Database &database) : Executor(database) + { + } + virtual ~FileExecutor() = default; + + void import_file(const std::string &file_name); +}; +} // namespace beedb::io \ No newline at end of file diff --git a/src/include/io/printing_executor.h b/src/include/io/printing_executor.h new file mode 100644 index 0000000..d26def4 --- /dev/null +++ b/src/include/io/printing_executor.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "executor.h" + +namespace beedb::io +{ +/** + * Executes SQL-queries and prints the result to the console. + */ +class PrintingExecutor : protected Executor +{ + public: + PrintingExecutor(Database &database) : Executor(database) + { + } + virtual ~PrintingExecutor() = default; + + void execute(const Query &query); +}; +} // namespace beedb::io \ No newline at end of file diff --git a/src/include/io/result_output_formatter.h b/src/include/io/result_output_formatter.h new file mode 100644 index 0000000..fa66428 --- /dev/null +++ b/src/include/io/result_output_formatter.h @@ -0,0 +1,89 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include +#include
+#include
+#include +#include + +namespace beedb::io +{ +/** + * Formats the result of a query, given by schema and set of tuples. + */ +class ResultOutputFormatter +{ + friend std::ostream &operator<<(std::ostream &stream, const ResultOutputFormatter &result_output_formatter); + + public: + /** + * Set the header for the output. + * + * @param schema Schema for the header. + */ + void header(const table::Schema &schema); + + /** + * Add a set of tuples to the result. + * @param tuples Set of tuples. + */ + void push_back(const std::vector &tuples); + + /** + * Add a single tuple to the result. + * @param tuple + */ + void push_back(const table::Tuple &tuple); + + /** + * Clear the formatter. + */ + inline void clear() + { + _table.clear(); + } + + /** + * @return True, when no tuple as added. + */ + inline bool empty() const + { + return _table.empty(); + } + + /** + * @return Number of added tuples. + */ + inline std::size_t count() const + { + return _count_tuples; + } + + private: + util::TextTable _table; + std::size_t _count_tuples = 0u; +}; +} // namespace beedb::io \ No newline at end of file diff --git a/src/include/io/user_console.h b/src/include/io/user_console.h new file mode 100644 index 0000000..bd06924 --- /dev/null +++ b/src/include/io/user_console.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "printing_executor.h" + +namespace beedb::io +{ +/** + * Provides a console to the user. + * The console is blocking and takes every input + * from the keyboard. + */ +class UserConsole : protected PrintingExecutor +{ + public: + UserConsole(Database &database) : PrintingExecutor(database) + { + } + virtual ~UserConsole() = default; + + /** + * Waits and executes user input from keyboard. + */ + void wait_for_input(); +}; +} // namespace beedb::io \ No newline at end of file diff --git a/src/include/parser/hsql_parser.h b/src/include/parser/hsql_parser.h new file mode 100644 index 0000000..53e3d0e --- /dev/null +++ b/src/include/parser/hsql_parser.h @@ -0,0 +1,94 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "parser/query_parser.h" +#include + +// hsql: +#include + +namespace beedb::parser +{ +class HSQLParser : public QueryParser +{ + public: + HSQLParser(const std::string &query); + virtual ~HSQLParser() = default; + + protected: + private: + // implementation of parser functionality (see QueryParser class): + const SelectExpression extractSELECT_impl() const override; + const std::optional extractFROM_impl() const override; + const std::optional extractWHERE_impl() const override; + const std::optional extractGROUPBY_impl() const override; + const std::optional extractORDERBY_impl() const override; + const std::optional extractLIMIT_impl() const override; + + virtual const std::optional extractCREATE_TABLE_impl() const override; + virtual const std::optional extractCREATE_INDEX_impl() const override; + + virtual const std::optional extractINSERT_impl() const override; + + virtual const UpdateExpression extractUPDATE_impl() const override; + + /// HELPER FUNCTIONS: + /** + * @brief extract_From_impl evaluates a TableRef expression. + * Due to how joins are represented, this has to be done recursively. + * + * This function has side-effects (on fexp param). + * @param fexp holds the result (and more) + * @param from_expr the expression to evaluate + */ + std::unique_ptr extractFROM_impl(const hsql::TableRef *table_ref) const; + + /** + * @brief extract_predicate converts a predicate hsql::Expr* to a predicate in the database internal representation. + * Is called recursively for logical connected sub-predicates. Makes use of the build_operand helper, defined below. + * @param expression + * @return a std::variant<> of pointers + */ + expression::Predicate extract_predicate(const hsql::Expr *expression) const; + /** + * @brief build_operand extracts operands of an atom + * @param expression + * @return a std::variant<> of values + */ + const expression::Operand build_operand(const hsql::Expr *expression) const; + + /** + * @brief build_attribute is a helper that builds a expression::Attribute. + * This is done by checking for existence of alias, table references etc. + * @param h_expr + * @return + */ + expression::Attribute build_attribute(const hsql::Expr *h_expr) const; + expression::AttributeOrigin determine_function_type(const std::string &function_name) const; + + // hsql specific members: + hsql::SQLParserResult _hsql_result; + const hsql::SQLStatement *_hsql_statement; +}; +} // namespace beedb::parser \ No newline at end of file diff --git a/src/include/parser/query_parser.h b/src/include/parser/query_parser.h new file mode 100644 index 0000000..1e40b8e --- /dev/null +++ b/src/include/parser/query_parser.h @@ -0,0 +1,325 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "expression/predicate.h" +#include +#include +#include
+ +namespace beedb::parser +{ + +using SelectExpression = beedb::expression::Attributes; + +/** + * @brief The FromExpression struct is used by the QueryParser class to + * represent expressions of tables, joins (and subqueries?). + */ +using Alias = std::optional; +using TableDescr = std::pair; + +struct FromDescr; +using JoinDescr = std::tuple, std::unique_ptr, expression::Predicate>; +using CrossProductDescr = std::vector>; +// struct SELECTDescr; +struct FromDescr +{ + std::variant descr; +}; + +static inline void print_structure(const std::variant &from, + std::string pfx = "") +{ + // std::cout << pfx; + switch (from.index()) + { + case 0: + std::cout << "TableDescr" << std::endl; + break; + case 1: { + std::cout << "JoinDescr with "; + const auto &pred = std::get(std::get(from)); + std::string pred_str; + std::visit(expression::LogicalConnective::TermStringifier{pred_str}, pred); + std::cout << pred_str << std::endl; + print_structure(std::get<0>(std::get(from))->descr, pfx + "\t"); + print_structure(std::get<1>(std::get(from))->descr, pfx + "\t"); + } + break; + case 2: + std::cout << "CrossProductDescr" << std::endl; + for (const auto &el : std::get(from)) + { + print_structure(el->descr, pfx + "\t"); + } + default: + return; + } +} + +struct FromExpression +{ + + std::unique_ptr from_description; + + std::vector> predicates() const + { + auto predicates = std::vector>(); + std::visit(PredicateExtractor{predicates}, from_description->descr); + return predicates; + } + + std::vector> tables() const + { + auto attributes = std::vector>(); + std::visit(TableExtractor{attributes}, from_description->descr); + return attributes; + } + + struct PredicateExtractor + { + // helper struct for visitor pattern + std::vector> &predicates; + + // depending on the variant type, + // c++ picks the appropiate call operator: + void operator()(TableDescr &) + { /* do nothing */ + } + void operator()(JoinDescr &join_description) + { + predicates.push_back(std::get<2>(join_description)); + std::visit(PredicateExtractor{predicates}, std::get<0>(join_description)->descr); + std::visit(PredicateExtractor{predicates}, std::get<1>(join_description)->descr); + } + void operator()(CrossProductDescr &descriptions) + { + for (const auto &description : descriptions) + { + std::visit(PredicateExtractor{predicates}, description->descr); + } + } + }; + + struct TableExtractor + { + // helper struct for visitor pattern + std::vector> &attributes; + + // depending on the variant type, + // c++ picks the appropiate call operator: + void operator()(TableDescr &table_description) + { + attributes.push_back(table_description); + } + void operator()(JoinDescr &join_description) + { + std::visit(TableExtractor{attributes}, std::get<0>(join_description)->descr); + std::visit(TableExtractor{attributes}, std::get<1>(join_description)->descr); + } + void operator()(CrossProductDescr &cross_product_description) + { + for (const auto &description : cross_product_description) + { + std::visit(TableExtractor{attributes}, description->descr); + } + } + }; +}; + +using WhereExpression = expression::Predicate; + +using GroupByExpression = expression::Attributes; + +using OrderByExpression = expression::Attributes; + +struct LimitExpression +{ + std::uint64_t limit; + std::uint64_t offset; +}; + +struct ColumnExpression +{ + std::string column_name; + table::Type::Id type_id; + std::uint16_t length; + bool is_null; +}; + +struct CreateTableExpression +{ + std::string table_name; + bool if_not_exists; + std::vector column_expressions; +}; + +struct CreateIndexExpression +{ + std::string table_name; + std::string column_name; + std::string index_name; + bool is_unique; + index::Type type; + bool if_not_exists; +}; + +struct InsertExpression +{ + std::string table_name; + std::vector column_names; + std::vector>> values_rows; +}; + +struct UpdateExpression +{ + std::optional from; + std::vector> updates; + std::optional where; +}; + +/** + * @brief The QueryParser class offers methods that extract information for logical plan creation. + * This interface can be subclassed for different types of parsers. + * Creation of the logical plan is solely based on this interface and therefore be done without knowing the + * frontend. + */ +class QueryParser +{ + + public: + QueryParser(const std::string &query); // simply stores the query string + virtual ~QueryParser() = default; + + // this enum provides information about the type of query + enum class QueryType + { + SELECT = 0, + UNION, + CREATE_TABLE, + CREATE_INDEX, + UPDATE, + INSERT, + DELETE, + DROP, // => "type of statements that we support" + UNSUPPORTED // => "a given query is a statement we do not support" + }; + + QueryType type() const; + + const SelectExpression extractSELECT() const + { + assert(_query_type == QueryType::SELECT); + // the implementation is guarded to prevent calling extractSelect on non-selection queries. + return extractSELECT_impl(); + } + + const std::optional extractFROM() const + { + assert(_query_type == QueryType::SELECT); + return extractFROM_impl(); + } + + const std::optional extractWHERE() const + { + assert(_query_type == QueryType::SELECT); + return extractWHERE_impl(); + } + + const std::optional extractGROUPBY() const + { + assert(_query_type == QueryType::SELECT); + return extractGROUPBY_impl(); + } + + const std::optional extractORDERBY() const + { + assert(_query_type == QueryType::SELECT); + return extractORDERBY_impl(); + } + + const std::optional extractLIMIT() const + { + assert(_query_type == QueryType::SELECT); + return extractLIMIT_impl(); + } + + const std::optional extractCREATE_TABLE() const + { + assert(_query_type == QueryType::CREATE_TABLE); + return extractCREATE_TABLE_impl(); + } + + const std::optional extractCREATE_INDEX() const + { + assert(_query_type == QueryType::CREATE_INDEX); + return extractCREATE_INDEX_impl(); + } + + const std::optional extractINSERT() const + { + assert(_query_type == QueryType::INSERT); + return extractINSERT_impl(); + } + + const UpdateExpression extractUPDATE() const + { + assert(_query_type == QueryType::UPDATE); + return extractUPDATE_impl(); + } + + protected: + static bool operands_should_swap(const beedb::expression::Operand &lhs, const beedb::expression::Operand &rhs); + + virtual const SelectExpression extractSELECT_impl() const = 0; + virtual const std::optional extractFROM_impl() const = 0; + virtual const std::optional extractWHERE_impl() const = 0; + virtual const std::optional extractGROUPBY_impl() const = 0; + virtual const std::optional extractORDERBY_impl() const = 0; + virtual const std::optional extractLIMIT_impl() const = 0; + + virtual const std::optional extractCREATE_TABLE_impl() const = 0; + virtual const std::optional extractCREATE_INDEX_impl() const = 0; + + virtual const std::optional extractINSERT_impl() const = 0; + + virtual const UpdateExpression extractUPDATE_impl() const = 0; + + /// Members: + protected: + const std::string &_query; + std::optional _query_type = std::nullopt; // supposed to be filled in the constructor of sub-classes +}; + +} // namespace beedb::parser \ No newline at end of file diff --git a/src/include/plan/graph/basic_graph.h b/src/include/plan/graph/basic_graph.h new file mode 100644 index 0000000..60982e6 --- /dev/null +++ b/src/include/plan/graph/basic_graph.h @@ -0,0 +1,288 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace beedb::util +{ +/** + * @brief The Graph class represents a directed graph with "weighted edges". + * + * Individual node store objects of type NodeData and have a unique id of type NodeIDType. + * Edges are directional and carry objects of type EdgeData. + * There can only be one edge from node A to node B. + * + * This class makes extensive use of the subscript operator[]. Edges and nodes can be created and accessed by simply + * using the operator. For example: `graph[edge] = edge_value`, `NodeDataType value = graph[node]` or + * `graph[edge].remove()`. + */ +template class Graph +{ + public: + using EdgeID = std::pair; // edge is a pair: + using NodeIDs = std::vector; // a collection of node id's + + Graph() = default; + Graph(Graph &&) = default; + Graph(const Graph &) = default; + + protected: + using NodeData = std::map>; // holds actual node data + using EdgeData = std::map; // holds actual edge data + using NodeIDMap = std::map; // used for aux. data structures + + /** + * @brief The Edges struct Proxy data structure that helps implement + * the subscript-assignement operators for edges. This is more complicated + * (compared to node data), because we make use of auxillary data + * structures for edges (incoming and outgoing edges). + */ + class Edge + { + public: + Edge(Graph *ref, const EdgeID &edge) : _ref(ref), _edge(edge) + { + } + /** + * @brief operator = + * This gets called, when a call similar to + * `graph[{A,B}] = edge_value;` (clone-assignement) is made. + * + * This structure is required to keep auxillary structures (incomin- and outgoing nodes) consistent! + * + * @param data + */ + void operator=(const EdgeDataType &data) + { + // check graph consistency: we can not add an edge to nodes that dont exist: + if (_ref->_node_data.find(_edge.first) == _ref->_node_data.end() || + _ref->_node_data.find(_edge.second) == _ref->_node_data.end()) + { + + std::cerr << "ERROR: Trying to add edge, but nodes do not exist in graph!" + << " Edge is < " << _edge.first << " , " << _edge.second << " >" << std::endl; + + return; + } + + // the assertions test, if all auxillary structures are properly initialized: + assert(_ref->_incoming_node_ids.find(_edge.first) != _ref->_incoming_node_ids.end()); + assert(_ref->_incoming_node_ids.find(_edge.second) != _ref->_incoming_node_ids.end()); + assert(_ref->_outgoing_node_ids.find(_edge.first) != _ref->_outgoing_node_ids.end()); + assert(_ref->_outgoing_node_ids.find(_edge.second) != _ref->_outgoing_node_ids.end()); + + // create edge: + _ref->_outgoing_node_ids[_edge.first].push_back(_edge.second); + _ref->_incoming_node_ids[_edge.second].push_back(_edge.first); + _ref->_edge_data.insert({_edge, data}); + } + /** + * @brief operator EdgeData & conversion operator, that enables the + * subscript operator to be used as a simple getter for the data behind + * this proxy object. + */ + operator const EdgeDataType &() const + { + return _ref->_edge_data[_edge]; + } + /** + * @brief remove just removes this edge and its data. + * + * We also remove it from auxillary data structures ("incoming" and "outgoing" nodes). + */ + void remove() + { + // first, remove edge and its data + _ref->_edge_data.erase(_edge); + // then remove id from auxillary edge-structures: + std::vector &out_nodes = _ref->_outgoing_node_ids[_edge.first]; + out_nodes.erase(std::remove(out_nodes.begin(), out_nodes.end(), _edge.second), out_nodes.end()); + + NodeIDs &in_nodes = _ref->_incoming_node_ids[_edge.second]; + in_nodes.erase(std::remove(in_nodes.begin(), in_nodes.end(), _edge.second), in_nodes.end()); + } + + private: + Graph *_ref; + const EdgeID &_edge; + }; + + class Node + { + public: + Node(Graph *ref, const NodeIDType &nid) : _ref(ref), _nid(nid) + { + } + /** + * @brief operator = + * This gets called, when a call similar to + * `graph[node_id] = node_data;` is made. + * + * This structure is required to keep auxillary structures (incomin- and outgoing nodes) consistent! + * + * @param data + */ + void operator=(std::unique_ptr data) + { + _ref->insert({_nid, std::move(data)}); + } + /** + * @brief operator NodeDataType & conversion operator, that enables the + * subscript operator to be used as a simple getter for the data behind + * this proxy object. + */ + operator std::unique_ptr &() + { + return _ref->_node_data[_nid]; + } + + operator const NodeDataType &() const + { + return **(_ref->_node_data[_nid]); + } + + const NodeIDs &incomingNodes() const + { + return _ref->_incoming_node_ids[_nid]; + } + const NodeIDs &outgoingNodes() const + { + return _ref->_outgoing_node_ids[_nid]; + } + /** + * @brief remove removes this node and all connected edges. + */ + void remove() + { + // remove node data: + _ref->_node_data.erase(_nid); + + // remove incoming edges: + for (auto &other_node_id : _ref->_incoming_node_ids[_nid]) + { + _ref->_edge_data.erase({other_node_id, _nid}); // actual edge + + // remove edges to this node from _outgoing_node_ids of other_node: + NodeIDs &out_nodes = _ref->_outgoing_node_ids[other_node_id]; + out_nodes.erase(std::remove(out_nodes.begin(), out_nodes.end(), _nid), out_nodes.end()); + } + // remove outgoing edges: + for (auto &other_node_id : _ref->_outgoing_node_ids[_nid]) + { + _ref->_edge_data.erase({_nid, other_node_id}); // actual edge + + // remove edges to this node from _incoming_node_ids of other_node: + NodeIDs &in_nodes = _ref->_incoming_node_ids[other_node_id]; + in_nodes.erase(std::remove(in_nodes.begin(), in_nodes.end(), _nid), in_nodes.end()); + } + // remove auxillary entries for this node: + _ref->_incoming_node_ids.erase(_nid); + _ref->_outgoing_node_ids.erase(_nid); + } + + private: + Graph *_ref; + const NodeIDType &_nid; + }; + + public: + // getter and setter for nodes/their data. can change node's data + Node operator[](const NodeIDType &nid) + { + return Node(this, nid); + } + + const NodeIDType &insert( + typename NodeData::value_type &&map_pair) // parameter type is a std::pair + { + const NodeIDType &id = _node_data.insert({map_pair.first, std::move(map_pair.second)}).first->first; + _incoming_node_ids.insert({id, {}}); + _outgoing_node_ids.insert({id, {}}); + return id; + } + + // simple read-only getter for node-data + const std::unique_ptr &operator[](const NodeIDType &nid) const + { + return _node_data.at(nid); + } + + // getter for mutable edge-data. Access via pair + Edge operator[](const EdgeID &edge) + { + return Edge(this, edge); + } + // simple read-only getter for edge-data. Access via pair + const EdgeDataType &operator[](const EdgeID &edge_id) const + { + return _edge_data[edge_id]; + } + /** + * @brief toConsole exhaustive dump of all node- and edge ids to console, without data. + */ + void toConsole() const + { + std::cout << "Nodes:" << std::endl; + for (const auto &node : _node_data) + { + std::cout << "\t{" << node.first << ": " << static_cast(*(node.second)) << "}" << std::endl; + } + std::cout << "Edges:" << std::endl; + for (auto const &edge : _edge_data) + { + std::cout << "\t<" << edge.first.first << "," << edge.first.second << ">" << std::endl; + } + } + + const NodeIDs &outgoing_nodes(const NodeIDType &nid) const + { + return _outgoing_node_ids.at(nid); + } + + const NodeIDs &incoming_nodes(const NodeIDType &nid) const + { + return _incoming_node_ids.at(nid); + } + + bool is_empty() const + { + return _node_data.empty(); + } + + protected: // actual graph data should be available to daughter classes + NodeData _node_data; // contains node data. key is a NodeIDType + EdgeData _edge_data; // contains edge data. key is a pair + NodeIDMap _outgoing_node_ids; // for efficient connectivity lookup + NodeIDMap _incoming_node_ids; // for efficient connectivity lookup +}; + +} // namespace beedb::util \ No newline at end of file diff --git a/src/include/plan/logical/builder.h b/src/include/plan/logical/builder.h new file mode 100644 index 0000000..3bb8e3b --- /dev/null +++ b/src/include/plan/logical/builder.h @@ -0,0 +1,102 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "database.h" +#include "parser/query_parser.h" +#include "plan.h" + +namespace beedb::plan::logical +{ + +/** + * Used as a dictionary to resolve alias-definitions in a query. + * Usable for both attribute- and table alias definitions. + * + * key = alias + * value = actual name + */ +template using AliasMapping = std::unordered_map; + +using TableAliasMapping = AliasMapping>; +using AttributeToTableNameMapping = AliasMapping; + +class Builder +{ + public: + static const Plan build(Database &database, const parser::QueryParser &parser); + static const Plan buildSelectPlan(Database &database, const parser::QueryParser &parser); + static const Plan buildCreateTablePlan(Database &database, const parser::QueryParser &parser); + static const Plan buildCreateIndexPlan(Database &database, const parser::QueryParser &parser); + static const Plan buildInsertPlan(Database &database, const parser::QueryParser &parser); + static const Plan buildUpdatePlan(Database &database, const parser::QueryParser &parser); + + private: + /** + * @brief populateAttributeToTableMappingAndCheck adds a table reference for a given attribute, if this attribute + * has no reference yet. This action is checked and only available, if attribute can be uniquely associated with a + * table. For example, an exception is thrown, if the user does not disambiguate the tables in a self-join. + * @param attribute_mapping This structure is modified/populated + * @param table_references contains references to all tables that participate in this query. + * @param attr The attribute to be checked + */ + static void populateAttributeToTableMappingAndCheck(AttributeToTableNameMapping &attribute_mapping, + const TableAliasMapping &table_references, + const expression::Attribute &attr); + /** + * @brief resolveTableReferences sets missing table references of attribute operands in the given predicate. This + * allows the user to neglect referencing to tables, when disambiguities are resolveable (via table alias in self + * joins or single-table queries, for example). + * @param db an already populated map with attribute-to-table references. + * @param pred a predicate with (potentially) missing table references + * @return the given predicate with proper table references in attribute operands. + */ + static expression::Predicate resolveTableReferences(const AttributeToTableNameMapping &attribute_mapping, + const expression::Predicate &pred); + /** + * @brief resolveTableReferences looks up the table for a given attribute and creates it anew. + * @param attribute_mapping map, that is supposed to include a table name for this attribute + * @param attribute the table-less attribute (that has to be recreated) + * @return an identical attribute with a valid table reference + */ + static const expression::Attribute resolveTableReferences(const AttributeToTableNameMapping &attribute_mapping, + const expression::Attribute &attribute); + + static const NodeIDType addFromNodes( + const Database &db, Plan &plan, + std::variant &tables, + const AttributeToTableNameMapping &attribute_mapping); + static const NodeIDType addSelection(Plan &plan, const expression::Predicate &predicate, + const NodeIDType &parent_id); + static const NodeIDType addGroupBy(const AttributeToTableNameMapping &attribute_mapping, + const beedb::parser::GroupByExpression &groupbyexpr, Plan &plan, + const NodeIDType &parent_id); + static const NodeIDType addOrderBy(const AttributeToTableNameMapping &attribute_mapping, + const parser::OrderByExpression &orderbyexpr, Plan &plan, + const NodeIDType &parent_id); + static const NodeIDType addLimit(const parser::LimitExpression &limit_expression, Plan &plan, + const NodeIDType &parent_id); + + static void addProjection(logical::Plan &plan, const std::vector &attributes, + const AttributeToTableNameMapping &attribute_mapping, const NodeIDType &parent_id); +}; +} // namespace beedb::plan::logical \ No newline at end of file diff --git a/src/include/plan/logical/plan.h b/src/include/plan/logical/plan.h new file mode 100644 index 0000000..dd3973c --- /dev/null +++ b/src/include/plan/logical/plan.h @@ -0,0 +1,280 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "expression/operator.h" +#include "plan/graph/basic_graph.h" + +#include + +namespace beedb::plan::logical +{ + +/** + * Type of node identifiers. Is supposed to be unique for every operator instance within a plan. + * This type must be usable as a key in std::map + */ +using NodeIDType = std::string; + +/** + * @brief The LogicalPlan class This is a (directed) graph of logical operators with schema- and data flow indicated as + * edges. + * + * Nodes are (logical) Operators and Edges contain the schema produced by an operator. Nodes contain a dependency, that + * has to be satisfied by incoming edges. + * + * It is created by consuming a query object and producing the canonical (and probably inefficient) query plan. + * + * In a canonical plan, all occuring relations are first combined via cross-product, or, if explicitly defined by the + * user, as an inner join. Aggregations/Group by, selections and projections are defined "on top" of this cross-product. + * The resulting (unoptimized) graph is supposed to be semantically identical to the query. This is enforced by the + * notion of 'required schemas'. I.e. a selection, say, "A < 5", expects to find Attribute A in the Attributes of + * incoming edges. Operators can not reduce (but extend or replace) the list of input-attributes in order to produce the + * output schema, except for projection-operations. + * + * The plan is a graph and implements semantical consistency check's (see 'required schemas'). + * Semantic consistency is ensured, when all operator requirements are 'producable' by child operators, over the + * attributes set on incoming edges. + * + * Operators can check for additional conditions, for example the Scan operators requires a given table to be present in + * the database. + * + * + * Note that Operator's and it's components are all read only, i.e. const. The plan itself is created operator by + * operator. + * + * When performing plan optimizations, a plan is supposed to be treated as const and has to be recreated (based on an + * unoptimized one). + */ +class Plan : public beedb::util::Graph +{ + public: + Plan() = default; + + /** + * @brief producedSchema Generate schema that the result of this generator has. + * + * This is the union of attributes produced by operator with the id nid and all incoming attributes. + * @param nid + * @return the union of incoming attributes and additions of this operator + */ + expression::OutgoingSchema produced_attributes(const NodeIDType &nid) const; + + bool requirements_satisfied(const NodeIDType node_id) const; // TODO: make use of phantom types + + // convenience function. This can be done directly via "plan[Edge]" + // operator, but notation is then a little cluttered... + void connect(const NodeIDType source_id, const NodeIDType sink_id) + { + (*this)[EdgeID{source_id, sink_id}] = this->produced_attributes(source_id); + } + + /** + * @brief descendAndApply basic "descend" function, that calls a function on a node and recursivly on all it's + * children. Used to retrieve information from an existing plan, without altering it's state. + * + * @param starting_node where to start the descend + * @param fkt this is called on the starting_node and all its children. Is expected to have state and side-effects + * outside of logical::Plan. + */ + void descend_and_apply(const NodeIDType &starting_node, std::function fkt) const + { + fkt(starting_node); + for (const auto &child : _incoming_node_ids.at(starting_node)) + { + descend_and_apply(child, fkt); + } + } + + // typedefs for better readability: + using NodeMapContent = NodeData::value_type; // aka. a std::pair>, + using NodeRecreationFunction = std::function; + + const NodeIDType descend_and_recreate(const Plan &other, const NodeIDType &others_node, + NodeRecreationFunction recreate) + { + // recreate others_node and insert it into this plan. The id of the new node is stored in "node" + const NodeIDType &node = this->insert(recreate(others_node)); + + // traverse other_node's children: + for (const auto &others_child : other._incoming_node_ids.at(others_node)) + { + // recursively recreate all nodes of other: + const auto &new_child = descend_and_recreate(other, others_child, recreate); + // all of chilren's children should be created by now, making correct schema interence possible: + this->connect(new_child, node); + } + + return node; + } + + /** + * @brief insertBelow inserts a child below the specified parent. If there is already a child linked to the parent, + * we insert in between the old child and the parent, preserving the flow of schema/data. Edge information is + * recalculated + * @param new_child + * @param parent + */ + void insert_below(const NodeIDType new_child, const NodeIDType parent) + { + // note: children send data to their parents, from leaves to the root + + if (_incoming_node_ids[parent].size() > 1) + { + std::cerr << "ERROR(logical::Plan): I am supposed to replace more then one child with a single new child (" + << new_child << "). This is undefined behaviour, no action is performed! (Parent node is " + << parent << ")" << std::endl; + return; + } + else if (_incoming_node_ids[parent].size() == 1) + { + // in case there already exists a child below the parent, + // we insert the new child between the parent and the old child! + + // first, we remove the existing edge from the parent's old child to the parent: + auto &old_child = _incoming_node_ids[parent][0]; + (*this)[EdgeID{old_child, parent}].remove(); + + // connect old child to new child (aka. replacing the removed edge): + connect(old_child, new_child); + } + + connect(new_child, parent); // in any valid case, we connect the new child to the parent + } + + // void generateDotFile(std::string& filepath); // TODO + + inline NodeIDType get_new_operator_ID(const std::string &name = "operator") + { + _last_operator_id = std::to_string(++_operator_count) + "_" + name; + return _last_operator_id; + } + + inline NodeIDType last_added_node() const + { + return _last_operator_id; + } + + void print_table() const; + + /** + * @brief findRoot returns the root node, if one can be determined + * @return is std::nullopt, if no root is found + */ + std::optional find_root() const + { + assert(_incoming_node_ids.size() == _node_data.size()); + assert(_node_data.size() == _outgoing_node_ids.size()); + + if (_node_data.size() == 1) + { + return _node_data.begin()->first; + } + + // the root is supposed to be the only node, that has only incoming nodes. + std::optional root = std::nullopt; + for (const auto &[node_id, _] : _node_data) + { + assert(_incoming_node_ids.find(node_id) != _incoming_node_ids.end()); + assert(_outgoing_node_ids.find(node_id) != _outgoing_node_ids.end()); + if (_incoming_node_ids.at(node_id).size() > 0 && _outgoing_node_ids.at(node_id).size() == 0) + { + if (root.has_value()) + { + // there is another node that satisfies this criterion, we can not determine the root uniquely! + return std::nullopt; + } + root = node_id; + } + } + // root remains empty, if no node fulfilling the criterion is found + return root; + } + + /** + * @brief is_proper_plan_graph Consistency check, that determines, if this (directed) plan-graph is fully connected + * by a single path. + * + * It traverses the graph "backwards", by starting at the root and "marking" all visited nodes. + * If all nodes are visited in backwards-direction from the node, the graph is "proper". + * + * Note that this has nothing to do with operators, just the graph topology is considered. + * + * @return true, if graph is "proper" + */ + bool is_proper_plan_graph() const + { + /// We traverse the graph "backwards", by starting at the root and "marking" all visited nodes + /// Begin traversal: + /// For all incoming nodes, do: + /// * remove id from "unvisited_nodes" + /// * call this procedure on children + /// Note: + /// * If we try to remove an id twice, we detected a cycle + /// * if not all id's are consumed from "unvisited nodes", we have + /// "islands" of nodes that are are not connected to the root + /// In both cases, the graph is "improper" + + auto root = find_root(); // makes a pass over all registered nodes/operators + if (!root.has_value()) + { + return false; // a plan without a well defined root is not "proper" + } + + // put all node id's of the plan into a set: + std::set unvisited_nodes; + std::transform(_node_data.begin(), _node_data.end(), std::inserter(unvisited_nodes, unvisited_nodes.end()), + std::bind(&NodeData::value_type::first, std::placeholders::_1)); + + bool is_cyclic = false; + + // create helper, that removes visited nodes from "unvisited_nodes" + auto visit_node = [&](const NodeIDType &node) { + if (unvisited_nodes.find(node) == unvisited_nodes.end()) + { + is_cyclic = true; + } + else + { + unvisited_nodes.erase(node); + } + }; + + descend_and_apply(root.value(), visit_node); + + return !is_cyclic && unvisited_nodes.empty(); + } + + // /** + // * @brief clone + // * @return note that the returned plan-copy is mutable/not const! + // */ + // Plan clone() const { + // return {}; + // } + private: + unsigned _operator_count = 0; + + NodeIDType _last_operator_id; +}; +} // namespace beedb::plan::logical \ No newline at end of file diff --git a/src/include/plan/optimizer/optimizer.h b/src/include/plan/optimizer/optimizer.h new file mode 100644 index 0000000..f3cafb6 --- /dev/null +++ b/src/include/plan/optimizer/optimizer.h @@ -0,0 +1,70 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include + +#include "database.h" +#include "plan/logical/plan.h" + +namespace beedb::plan::logical +{ + +class Optimizer +{ + using PredicateMap = std::map>; + + // map, that holds references to a specific type of operator (for convenient iteration and typecheck) + template + using OperatorMap = std::map>; + + public: + Optimizer(const beedb::Database &database, const logical::Plan &unoptimized_plan); + + const logical::Plan optimize(); + + private: + void optimization_force_hash_join(); + + /** + * @brief fill_access_maps addes references to the Operator<> maps (members of this class). + */ + void fill_access_maps(); + /// Members: + + const beedb::Database &_database; + const beedb::Config &_config; + const logical::Plan &_plan; + + /// Access structures: + + const NodeIDType _plan_root; + PredicateMap _plan_join_predicate_map; + PredicateMap _plan_selection_predicate_map; + + OperatorMap _plan_table_map; + OperatorMap _plan_join_map; + OperatorMap _plan_selection_map; +}; + +} // namespace beedb::plan::logical \ No newline at end of file diff --git a/src/include/plan/physical/builder.h b/src/include/plan/physical/builder.h new file mode 100644 index 0000000..b8c2c99 --- /dev/null +++ b/src/include/plan/physical/builder.h @@ -0,0 +1,154 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "plan.h" +#include "plan/logical/plan.h" +#include +#include +#include +#include +#include + +namespace beedb::plan::physical +{ +/** + * The Builder builds the physical operator plan based on the logical plan. + */ +class Builder +{ + public: + /** + * Creates a plan containing physical operators + * based on the logical plan. + * + * @param database Database for the execution. + * @param logical_plan Logical plan as base for physical operators. + * @return Plan with physical operators. + */ + static Plan build(Database &database, const logical::Plan &logical_plan); + + /** + * Creates a plan for filling index data structures with data. + * + * @param database Database for execution. + * @param table_name Name of the indexed table. + * @param column_name Name of the indexed column. + * @param index_name Name of the index. + * @return Plan for filling the index. + */ + static Plan build_index_plan(Database &database, const std::string &table_name, const std::string &column_name, + const std::string &index_name); + + private: + /** + * Builds a physical execution operator based on a logical node. + * + * @param database Database for execution. + * @param logical_plan Full logical plan. + * @param logical_node_name Name of the logical node. + * @return Pointer to the built physical operator. + */ + static std::unique_ptr build_operator(Database &database, + const logical::Plan &logical_plan, + const std::string &logical_node_name); + + /** + * Turns a logical predicate into a physical predicate matcher. + * + * @param predicate Logical predicate. + * @param schema Schema for the table, the predicate will be evaluated on. + * @return Pointer to the predicate matcher. + */ + static std::unique_ptr build_predicate(const expression::Predicate &predicate, + const table::Schema &schema); + + /** + * Turns a logical atom into a physical predicate matcher. + * + * @param atom Logical atom. + * @param schema Schema for the table, the predicate will be evaluated on. + * @return Pointer to the predicate matcher. + */ + static std::unique_ptr build_predicate( + const std::unique_ptr &atom, const table::Schema &schema); + + /** + * Turns a logical join predicate into a physical predicate matcher. + * + * @param predicate Logical predicate. + * @param left_schema Schema of the left join table. + * @param right_schema Schema of the right join table. + * @return Pointer to the predicate matcher. + */ + static std::unique_ptr build_predicate(const expression::Predicate &predicate, + const table::Schema &left_schema, + const table::Schema &right_schema); + + /** + * Turns a logical join atom into a physical predicate matcher. + * + * @param atom Logical atom. + * @param left_schema Schema of the left join table. + * @param right_schema Schema of the right join table. + * @return Pointer to the predicate matcher. + */ + static std::unique_ptr build_predicate( + const std::unique_ptr &atom, const table::Schema &left_schema, + const table::Schema &right_schema); + + /** + * Builds a physical value from a logical operand for the given type. + * + * @param operand Logical operand. + * @param type Type for the new value. + * @return Physical value. + */ + static table::Value build_value(const expression::Operand &operand, const table::Type &type); + + /** + * Builds a physical tuple based on the required schema and logical operands. + * + * @param schema Schema of the target table. + * @param attributes Logical schema. + * @param values Logical operands that represent the values. + * @return Physical tuple. + */ + static table::Tuple build_tuple(const table::Schema &schema, const expression::Attributes &attributes, + const std::vector> &values); + + /** + * Extracts key ranges for index scans from a logical predicate. + * + * @param predicate Logical predicate. + * @return Set of key ranges the index scan has to lookup. + */ + static std::set extract_key_ranges(const expression::Predicate &predicate); + + /** + * Extracts a key from a logical atom. + * @param atom Logical atom. + * @return Key from the atom or std::nullopt, when no key was represented by the atom. + */ + static std::optional extract_key(const std::unique_ptr &atom); +}; +} // namespace beedb::plan::physical \ No newline at end of file diff --git a/src/include/plan/physical/plan.h b/src/include/plan/physical/plan.h new file mode 100644 index 0000000..8061e36 --- /dev/null +++ b/src/include/plan/physical/plan.h @@ -0,0 +1,62 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include +#include
+#include
+#include + +#include "plan/logical/plan.h" + +namespace beedb::plan::physical +{ +/** + * The plan grants access to the physical operator chain. + */ +class Plan +{ + public: + Plan(Database &database, std::unique_ptr root) + : _database(database), _root(std::move(root)) + { + } + + virtual ~Plan() = default; + + /** + * Executes the physical operators. + * + * @param schema_callback Will be called once, when the output schema is not empty. + * @param row_callback Will be called for every output tuple. + */ + void execute(std::function schema_callback, + std::function row_callback); + + protected: + Database &_database; + std::unique_ptr _root; +}; +} // namespace beedb::plan::physical \ No newline at end of file diff --git a/src/include/statistic/system_statistics.h b/src/include/statistic/system_statistics.h new file mode 100644 index 0000000..4abdaa5 --- /dev/null +++ b/src/include/statistic/system_statistics.h @@ -0,0 +1,98 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include
+#include + +namespace beedb::statistic +{ +/** + * Container for managing statistics regarding tables. + */ +class TableStatistic +{ + public: + TableStatistic() = default; + ~TableStatistic() = default; + + void cardinality(table::Table &table, const std::uint64_t cardinality) + { + this->cardinality(table.id(), cardinality); + } + + void cardinality(const std::int32_t table_id, const std::uint64_t cardinality) + { + _cardinality[table_id] = cardinality; + } + + std::uint64_t cardinality(table::Table &table) const + { + if (_cardinality.find(table.id()) == _cardinality.end()) + { + return 0u; + } + + return _cardinality.at(table.id()); + } + + void add_cardinality(table::Table &table, const std::uint64_t cardinality = 1u) + { + if (_cardinality.find(table.id()) != _cardinality.end()) + { + _cardinality[table.id()] += cardinality; + } + else + { + _cardinality[table.id()] = cardinality; + } + } + + void sub_cardinality(table::Table &table, const std::uint64_t cardinality = 1u) + { + if (_cardinality.find(table.id()) != _cardinality.end()) + { + _cardinality[table.id()] -= cardinality; + } + } + + private: + std::unordered_map _cardinality; +}; + +class SystemStatistics +{ + public: + SystemStatistics() = default; + ~SystemStatistics() = default; + + TableStatistic &table_statistics() + { + return _table_statistics; + } + + private: + TableStatistic _table_statistics; +}; + +} // namespace beedb::statistic \ No newline at end of file diff --git a/src/include/table/column.h b/src/include/table/column.h new file mode 100644 index 0000000..e4285fc --- /dev/null +++ b/src/include/table/column.h @@ -0,0 +1,212 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "type.h" +#include +#include +#include +#include +#include +#include + +namespace beedb::table +{ +/** + * Represents one column in a table schema. + */ +class Column +{ + friend std::ostream &operator<<(std::ostream &stream, const Column &column); + + public: + Column(const std::int32_t id, const Type type, const bool is_nullable = false, + std::vector> &&indices = {}) + : _id(id), _type(type), _is_nullable(is_nullable), _indices(indices) + { + } + + Column(const Type type, const bool is_nullable = false, + std::vector> &&indices = {}) + : _type(type), _is_nullable(is_nullable), _indices(indices) + { + } + + Column(const Column &other) + : _id(other._id), _type(other._type), _is_nullable(other._is_nullable), _indices(other._indices) + { + } + + ~Column() = default; + + /** + * @return Id of the column or -1 if the column was not persisted to the metadata table. + */ + inline std::int32_t id() const + { + return _id; + } + + /** + * @return Type of the column. + */ + inline const Type &type() const + { + return _type; + } + + /** + * @return True, if data can be null for the column. + */ + inline bool is_nullable() const + { + return _is_nullable; + } + + /** + * @return True, when at least one index exists for this column. + */ + inline bool is_indexed() const + { + return _indices.empty() == false; + } + + /** + * Allows to ask for an index with specific requirements. + * + * @param require_range_index Specifies if we need an index supporting range queries. + * @return True, when an index with given requirements exists. + */ + inline bool is_indexed(const bool require_range_index) const + { + if (require_range_index == false) + { + return is_indexed(); + } + + for (auto index : _indices) + { + if (index->supports_range()) + { + return true; + } + } + + return false; + } + + /** + * Adds an index to the column. + * + * @param index Index to add. + */ + inline void add_index(std::shared_ptr index) + { + _indices.push_back(std::move(index)); + } + + /** + * @return All indices for this column. + */ + const std::vector> &indices() const + { + return _indices; + } + + /** + * Checks whether an index with a specific name exists. + * + * @param name Name of the index. + * @return True, when the index exists. + */ + bool has_index(const std::string &name) const + { + return index(name) != nullptr; + } + + /** + * Lookup for a specific index. + * + * @param name Name of the index. + * @return Pointer to the specific index. + */ + std::shared_ptr index(const std::string &name) const + { + for (auto &index : _indices) + { + if (index->name() == name) + { + return index; + } + } + + return {}; + } + + /** + * Lookup for a specific index. + * + * @param need_range Specifies whether the wanted index has to provied range queries. + * @return An index that supports the requirements. + */ + std::shared_ptr index(const bool need_range) const + { + if (_indices.empty() == false && need_range == false) + { + return _indices[0]; + } + + for (auto index : _indices) + { + if (index->supports_range()) + { + return index; + } + } + + return {}; + } + + bool operator==(const Column &other) const + { + return _id == other.id(); + } + bool operator!=(const Column &other) const + { + return _id != other.id(); + } + bool operator==(const Type type) const + { + return _type == type; + } + bool operator!=(const Type type) const + { + return _type != type; + } + + private: + const std::int32_t _id = -1; + const Type _type; + const bool _is_nullable; + std::vector> _indices; +}; +} // namespace beedb::table \ No newline at end of file diff --git a/src/include/table/date.h b/src/include/table/date.h new file mode 100644 index 0000000..efed3d9 --- /dev/null +++ b/src/include/table/date.h @@ -0,0 +1,113 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include + +namespace beedb::table +{ +/** + * Implements date type. + */ +class Date +{ + public: + Date() noexcept = default; + Date(const std::uint16_t year, const std::uint8_t month, const std::uint8_t day) noexcept + : _year(year), _month(month), _day(day) + { + } + + ~Date() noexcept = default; + + std::uint16_t year() const noexcept + { + return _year; + } + std::uint8_t month() const noexcept + { + return _month; + } + std::uint8_t day() const noexcept + { + return _day; + } + + Date &operator=(const Date &other) = default; + + bool operator==(const Date other) const noexcept + { + return _year == other._year && _month == other._month && _day == other._day; + } + + bool operator!=(const Date other) const noexcept + { + return _year != other._year || _month != other._month || _day != other._day; + } + + bool operator<(const Date other) const noexcept + { + return _year < other._year || (_year == other._year && _month < other._month) || + (_year == other._year && _month == other._month && _day < other._day); + } + + bool operator<=(const Date other) const noexcept + { + return _year < other._year || (_year == other._year && _month < other._month) || + (_year == other._year && _month == other._month && _day <= other._day); + } + + bool operator>(const Date other) const noexcept + { + return _year > other._year || (_year == other._year && _month > other._month) || + (_year == other._year && _month == other._month && _day > other._day); + } + + bool operator>=(const Date other) const noexcept + { + return _year > other._year || (_year == other._year && _month > other._month) || + (_year == other._year && _month == other._month && _day >= other._day); + } + + bool operator==(std::nullptr_t) const noexcept + { + return _year == 0 && _month == 0 && _day == 0; + } + + public: + std::uint16_t _year = 0; + std::uint8_t _month = 0; + std::uint8_t _day = 0; +}; +} // namespace beedb::table + +namespace std +{ +template <> struct hash +{ + std::size_t operator()(const beedb::table::Date &date) + { + const std::uint32_t h = ((((std::uint32_t{0} | date.day()) << 8) | date.month()) << 8) || date.day(); + return hash()(h); + } +}; +} // namespace std \ No newline at end of file diff --git a/src/include/table/memory_table.h b/src/include/table/memory_table.h new file mode 100644 index 0000000..625e226 --- /dev/null +++ b/src/include/table/memory_table.h @@ -0,0 +1,119 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "schema.h" +#include "tuple.h" +#include +#include +#include + +namespace beedb::table +{ +/** + * Stores data in memory. + * Some times, the data can not stay in the buffer from the BufferManager + * and has to be copied to "safe" memory that will not be replaced. + */ +class MemoryTable +{ + public: + MemoryTable(const Schema &schema) : _schema(schema) + { + } + + ~MemoryTable() = default; + + /** + * Copy data from disk (or page) to the memory. + * + * @param tuple Tuple to be copied to memory. + * @return Index within the memory table. + */ + std::size_t copy_to_memory(const Tuple &tuple) + { + Tuple in_memory_tuple(_schema, _schema.row_size()); + std::memcpy(in_memory_tuple.data(), tuple.data(), _schema.row_size()); + _tuples.push_back(std::move(in_memory_tuple)); + return _tuples.size() - 1; + } + + /** + * @return Constant set of all tuples stored in the memory table. + */ + const std::vector &tuples() const + { + return _tuples; + } + + /** + * @return Set of all tuples stored in the memory table. + */ + std::vector &tuples() + { + return _tuples; + } + + /** + * @return Schema of this table. + */ + const Schema &schema() const + { + return _schema; + } + + /** + * @return True, when the table has no tuples stored. + */ + bool empty() const + { + return _tuples.empty(); + } + + /** + * @return Number of stored tuples. + */ + std::size_t size() const + { + return _tuples.size(); + } + + /** + * Grants access to a specific tuple. + * @param index Index of the tuple. + * @return Tuple. + */ + const Tuple &at(const std::size_t index) const + { + return _tuples[index]; + } + + const Tuple &operator[](const std::size_t index) const + { + return _tuples[index]; + } + + private: + const Schema &_schema; + std::vector _tuples; +}; +} // namespace beedb::table \ No newline at end of file diff --git a/src/include/table/schema.h b/src/include/table/schema.h new file mode 100644 index 0000000..ce35183 --- /dev/null +++ b/src/include/table/schema.h @@ -0,0 +1,342 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "column.h" +#include + +#include + +#include "expression/attribute.h" + +namespace beedb::table +{ + +class Schema +{ + public: + using ColumnIndexType = std::size_t; + + Schema() = default; + Schema(const Schema &other) = default; + Schema(Schema &&other) = default; + + /** + * Schema creates an empty schema + * + * @param table_name Name of the table. + */ + Schema(const std::string &table_name) : _table_name(table_name) + { + } + + /** + * Combines to schemas to a new one. + * + * @param first First schema. + * @param second Second schema. + * @param table_name New name for the new schema (will be moved). + */ + Schema(const Schema &first, const Schema &second, const std::string &&table_name) + : _table_name(std::move(table_name)) + { + const auto size = first.size() + second.size(); + _columns.reserve(size); + _attributes.reserve(size); + _column_order.reserve(size); + _offset.reserve(size); + + _offset.insert(_offset.begin(), first._offset.begin(), first._offset.end()); + for (const auto offset : second._offset) + { + _offset.push_back(first._row_size + offset); + } + + for (auto i = 0u; i < first.size(); i++) + { + _columns.push_back(first._columns[i]); + _attributes.push_back(first._attributes[i]); + _column_order.push_back(i); + } + + for (auto i = 0u; i < second.size(); i++) + { + _columns.push_back(second._columns[i]); + _attributes.push_back(second._attributes[i]); + _column_order.push_back(_columns.size() - 1); + } + + _row_size = first._row_size + second._row_size; + } + + /** + * @brief Schema Recreates a schema by replacing the table name with a new one. Also replaces the table-name in all + * attributes. Warning! This is intended to be used on TABLE creation only, since attributes are only checked by + * "name", not combined-name! + * @param other + * @param attributes + * @param new_table_name the new name of the table + */ + Schema(const Schema &other, const std::vector &attributes, const std::string &new_table_name) + : _table_name(new_table_name), _columns(other._columns), _attributes(attributes), _offset(other._offset), + _column_order(other._column_order), _row_size(other._row_size) + { + + assert(other._attributes.size() == attributes.size()); + for (auto i = 0u; i < attributes.size(); i++) + { + assert(attributes[i].name == + other._attributes[i] + .name); // every attribute in "attributes" should also be included in the given schema + } + } + + /** + * Creates a new schema from another schema and new attributes. + * + * Note that this can have a smaller length and different order then other._attributes and this->_columns! + * + * @param other Other schema. + * @param attributes New attributes. + */ + Schema(const Schema &other, const std::vector &attributes) + : _table_name(other._table_name), _columns(other._columns), _attributes(attributes), _offset(other._offset), + _row_size(other._row_size) + { + // build _column_order based on "attributes", since it's order does not necessarily coincide with the physical + // column order + for (const auto &attribute : attributes) + { + + auto old_index = other.column_index(attribute); // the "other" schema has the correct mapping! + assert( + old_index.has_value()); // every attribute in "attributes" should also be included in the given schema + + const auto &old_attribute = other.attribute(old_index.value()); + assert(old_attribute.table == attribute.table); + + _column_order.push_back(old_index.value()); + } + } + + ~Schema() = default; + + /** + * Adds a new column and its attribute to the schema. + * + * @param column Column to be added. + * @param attribute Logical attribute for the column. + * @param visible True, when the column is visible for output. + */ + void add(Column column, expression::Attribute &&attribute) + { + assert(attribute.table.has_value() && attribute.table.value() == _table_name); + _attributes.push_back(std::move(attribute)); + _columns.push_back(column); + _column_order.push_back(_columns.size() - 1); + if (_offset.empty()) + { + _offset.push_back(0u); + } + else + { + const auto last_index = _offset.size() - 1; + _offset.push_back(_offset[last_index] + _columns[last_index].type().size()); + } + _row_size += column.type().size(); + } + + /** + * @return Number of columns in the schema. + */ + inline std::size_t size() const + { + return _columns.size(); + } + + /** + * Calculates the byte-offset for a specific column. + * + * @param column_index Index of the column. + * @return Offset in number of bytes for the raw data access. + */ + inline std::size_t offset(const std::size_t column_index) const + { + return _offset[column_index]; + } + + /** + * @return Number of bytes of the raw data. + */ + inline std::uint32_t row_size() const + { + return _row_size; + } + + /** + * + * Calculates the index of a column in the schema. Based on expression::Attribute equality semantics. + * @param attribute Logical attribute. + * @return Index in the schema. + */ + std::optional column_index(const expression::Attribute &attribute) const + { + for (auto i = 0u; i < _attributes.size(); i++) + { + if (_attributes[i] == attribute) + { // uses combined_name/expression::Attribute equality semantics + return _column_order[i]; + } + } + return std::nullopt; + } + + /** + * Calculates the index of a column in the schema. Search is solely based on the attributes NAME, + * neither on its table name or alias!! + * + * @param attribute_name Name of the column. + * @return Index in the schema. + */ + std::optional column_index(const std::string &attribute_name) const + { + // this version is solely based on the attribute name + for (auto i = 0u; i < _attributes.size(); i++) + { + if (_attributes[i].name == attribute_name) + { + return _column_order[i]; + } + } + return std::nullopt; + } + + /** + * Checks whether the schema holds a specific attribute. + * + * @param attribute Logical attribute. + * @return True, when the schema contains the attribute. + */ + bool contains(const expression::Attribute &attribute) const + { + return column_index(attribute).has_value(); + } + + /** + * Checks whether the schema holds a specific column. + * + * @param column_name Name of the column. + * @return True, when the schema contains a column with the give name. + */ + bool contains(const std::string &column_name) const + { + return column_index(column_name).has_value(); + } + + /** + * @return Name of the table represented by this schema. + */ + inline const std::string &table_name() const + { + return _table_name; + } + + /** + * @return Constant set of all columns. + */ + inline const std::vector &columns() const + { + return _columns; + } + + /** + * Access to a specific column. + * + * @param index Index of the column. + * @return Constant access to the column. + */ + inline const Column &column(const std::size_t index) const + { + return _columns[index]; + } + + /** + * @return Set of all attributes. + */ + inline const std::vector &attributes() const + { + return _attributes; + } + + /** + * Access to a specific attribute. + * + * @param index Index of the attribute. + * @return Constant access to the attribute. + */ + inline const expression::Attribute &attribute(const std::size_t index) const + { + return _attributes[index]; + } + + /** + * @return True, when no column was added to the schema. + */ + inline bool empty() const + { + return _columns.empty(); + } + + /** + * @return Order of the column indices. + */ + const std::vector &column_order() const + { + return _column_order; + } + + // /** + // * Updates the column order. + // * + // * @param column_order Ordered column indices. + // */ + // void column_order(std::vector&& column_order) { _column_order = column_order; } + + Column &operator[](const ColumnIndexType index) + { + return _columns[index]; + } + const Column &operator[](const ColumnIndexType index) const + { + return _columns[index]; + } + + private: + const std::string _table_name; + std::vector _columns; + std::vector _attributes; + std::vector _offset; + std::vector _column_order; + std::uint32_t _row_size = 0u; +}; +} // namespace beedb::table \ No newline at end of file diff --git a/src/include/table/table.h b/src/include/table/table.h new file mode 100644 index 0000000..5ccc6d6 --- /dev/null +++ b/src/include/table/table.h @@ -0,0 +1,149 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include "schema.h" +#include +#include +#include +#include +#include + +#include // used to extract the schema in the logical format + +namespace beedb::table +{ +/** + * Represents a table in the database. + */ +class Table +{ + friend std::ostream &operator<<(std::ostream &stream, const Table &table); + + public: + Table(const std::int32_t id, const disk::Page::page_id page_id, const Schema &schema) + : _id(id), _page_id(page_id), _schema(schema) + { + } + + ~Table() = default; + + /** + * @return Name of the table. + */ + inline const std::string &name() const + { + return _schema.table_name(); + } + + /** + * @return Id of the table. + */ + inline std::int32_t id() const + { + return _id; + } + + /** + * @return Id of the first data page. + */ + inline disk::Page::page_id page_id() const + { + return _page_id; + } + + /** + * @return Id of the last data page (pages are like a linked list). + */ + inline disk::Page::page_id last_page_id() const + { + return _last_page_id; + } + + /** + * Updates the last page of a table. + * + * @param page_id Id of the last page. + */ + inline void last_page_id(const disk::Page::page_id page_id) + { + _last_page_id = page_id; + } + + /** + * @return Schema of the table. + */ + inline const Schema &schema() const + { + return _schema; + } + + Column &operator[](const std::size_t index) + { + return _schema[index]; + } + + Column &operator[](const expression::Attribute &attribute) + { + const auto index = _schema.column_index(attribute); + assert(index.has_value()); + return _schema[index.value()]; + } + + const Column &operator[](const std::size_t index) const + { + return _schema[index]; + } + + const Column &operator[](const expression::Attribute &attribute) const + { + const auto index = _schema.column_index(attribute); + assert(index.has_value()); + return _schema[index.value()]; + } + + /** + * @brief operator beedb::plan::logical::Attributes + * Conversion operator, that extracts the attributes of a table. + * The resulting object is the representation of a "schema" (set) in the + * logical plan. + */ + operator beedb::expression::Attributes() const + { + return _schema.attributes(); + } + + /** + * @return True, if the table is virtual aka not persisted. + */ + inline bool is_virtual() const + { + return _id == -1; + } + + private: + const std::int32_t _id; + const disk::Page::page_id _page_id; + disk::Page::page_id _last_page_id = disk::Page::INVALID_PAGE_ID; // Will not be persisted + Schema _schema; +}; +} // namespace beedb::table \ No newline at end of file diff --git a/src/include/table/table_disk_manager.h b/src/include/table/table_disk_manager.h new file mode 100644 index 0000000..9ecbdb8 --- /dev/null +++ b/src/include/table/table_disk_manager.h @@ -0,0 +1,102 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "schema.h" +#include "table.h" +#include "tuple.h" +#include +#include +#include +#include +#include + +namespace beedb::table +{ +/** + * The TableDiskManager specifies the interface between tables and the disk. + */ +class TableDiskManager +{ + public: + TableDiskManager(disk::BufferManager &buffer_manager); + ~TableDiskManager() = default; + + /** + * Reads the content of a page and interprets it as tuples + * for the given schema. + * + * @param page Page with raw content. + * @param schema Schema for the tuples. + * @return List of tuples stored at the given page. + */ + std::vector read_rows(disk::Page *page, const Schema &schema); + + /** + * Writes the tuple as raw content to a free page associated with the table. + * The written page will be unpinned after write. + * + * @param table Table. + * @param tuple Tuple to be written. + */ + void add_row(Table &table, Tuple &&tuple); + + /** + * Writes the tuple as raw content to a free page associated with the table. + * + * @param table Table. + * @param schema Schema the returned tuple should hold. + * @param tuple Tuple to be written. + * @return The tuple that was written to the page. The page is still pinned! + */ + Tuple add_row(Table &table, const Schema &schema, Tuple &&tuple); + + /** + * Takes a modified tuple and writes it to the disk. + * + * @param tuple The modified tuple that should be written to disk. + */ + void update_row(const Tuple &tuple); + + private: + disk::BufferManager &_buffer_manager; + + /** + * Scans for a page with enough free space for a new tuple. + * + * @param table Target table. + * + * @return Id of the page with free space. + */ + disk::Page::page_id find_page_for_row(Table &table); + + /** + * Adds a tuple to a free page. + * + * @param table Target table. + * @param tuple Tuple to be written. + * @return Page the tuple was written to and offset in bytes. + */ + std::pair add_row(Table &table, Tuple &tuple); +}; +} // namespace beedb::table \ No newline at end of file diff --git a/src/include/table/tuple.h b/src/include/table/tuple.h new file mode 100644 index 0000000..52a8e61 --- /dev/null +++ b/src/include/table/tuple.h @@ -0,0 +1,288 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "column.h" +#include "schema.h" +#include "type.h" +#include "value.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beedb::table +{ +/** + * Represents a tuple stored on the database. + */ +class Tuple +{ + public: + /** + * Creates a tuple with a given schema, stored on a page on the disk. + * + * @param schema Schema for the tuple. + * @param page_id Id of the page the tuple is stored on. + * @param data Raw content of the tuple. + */ + Tuple(const Schema &schema, const disk::Page::page_id page_id, const disk::Page::page_offset in_page_offset, + std::byte *data) + : _schema(schema), _page_id(page_id), _in_page_offset(in_page_offset), _data(data) + { + } + + /** + * Creates a tuple living in the memory. The tuple is not + * persisted on the disk. + * + * @param schema Schema of the tuple. + * @param row_size Size of the tuple in bytes. + */ + Tuple(const Schema &schema, const std::size_t row_size) + : _schema(schema), _page_id(disk::Page::INVALID_PAGE_ID), _in_page_offset(disk::Page::INVALID_PAGE_OFFSET), + _data(new std::byte[row_size]) + { + std::memset(_data, '\0', row_size); + } + + /** + * Moves the data from the other tuple into this. The + * other tuple will contain no data after moving. + * + * @param schema Schema for the tuple. + * @param move_from Source tuple. + */ + Tuple(const Schema &schema, Tuple &&move_from) + : _schema(schema), _page_id(move_from.page_id()), _in_page_offset(move_from._in_page_offset), + _data(move_from._data) + { + move_from.data(nullptr); + } + + /** + * Moves the data from the other tuple into this. The + * other tuple will contain no data after moving. + * + * @param move_from Source tuple. + */ + Tuple(Tuple &&move_from) noexcept + : _schema(move_from.schema()), _page_id(std::move(move_from._page_id)), + _in_page_offset(std::move(move_from._in_page_offset)), _data(std::move(move_from._data)) + { + move_from._data = nullptr; + } + + /** + * Copies a tuple into memory. + * + * @param copy_from Source tuple. + */ + Tuple(const Tuple ©_from) + : _schema(copy_from.schema()), _page_id(disk::Page::INVALID_PAGE_ID), + _in_page_offset(disk::Page::INVALID_PAGE_OFFSET), _data(new std::byte[copy_from._schema.row_size()]) + { + std::memcpy(_data, copy_from._data, copy_from.schema().row_size()); + } + + /** + * Frees the memory, if the tuple is not persisted on the disk. + */ + ~Tuple() + { + if (_page_id == disk::Page::INVALID_PAGE_ID && _data != nullptr) + { + delete[] _data; + } + } + + /** + * @return Schema of the tuple. + */ + inline const Schema &schema() const + { + return _schema; + } + + /** + * @return Id of the page the tuple is persisted on. + */ + inline disk::Page::page_id page_id() const + { + return _page_id; + } + + /** + * @return Offset in page or -1 if tuple is in memory. + */ + inline disk::Page::page_offset in_page_offset() const + { + return _in_page_offset; + } + + /** + * @return Access to the raw data. + */ + inline std::byte *data() const + { + return _data; + } + + /** + * @return True, when the tuple has data. + */ + inline bool has_data() const + { + return _data != nullptr; + } + + /** + * Updates the raw data. + * + * @param data New raw data. + */ + inline void data(std::byte *data) + { + _data = data; + } + + /** + * Extracts a value from the tuple for a specific column. + * + * @param index Index of the column. + * @return Value of the tuple. + */ + Value get(const std::size_t index) const + { + const auto offset = _schema.offset(index); + const Type &type = _schema[index].type(); + if (type == Type::INT) + { + return {type, *reinterpret_cast(&(_data[offset]))}; + } + else if (type == Type::LONG) + { + return {type, *reinterpret_cast(&(_data[offset]))}; + } + else if (type == Type::DOUBLE) + { + return {type, *reinterpret_cast(&(_data[offset]))}; + } + else if (type == Type::FLOAT) + { + return {type, *reinterpret_cast(&(_data[offset]))}; + } + else if (type == Type::CHAR) + { + return {type, std::string(reinterpret_cast(&_data[offset]))}; + } + + return {type, 0}; + } + + /** + * Updates the data of the tuple for a specific column. + * + * @param index Index of the column. + * @param raw_value New data. + */ + template void set(const std::size_t index, T &raw_value) + { + const auto offset = _schema.offset(index); + if constexpr (std::is_same::value) + { + const auto column_size = _schema[index].type().size(); + const auto length = std::min(std::size_t(column_size), raw_value.length()); + std::memcpy(&(_data[offset]), raw_value.c_str(), length); + if (raw_value.length() < column_size) + { + std::memset(&(_data[offset + raw_value.length()]), '\0', column_size - raw_value.length()); + } + } + else if constexpr (std::is_same::value) + { + std::memset(&(_data[offset]), '\0', 1); + } + else if constexpr (std::is_same, std::int32_t>::value || + std::is_same, std::int64_t>::value || + std::is_same, float>::value || + std::is_same, double>::value) + { + assert(_schema[index].type().size() == sizeof(T) && "Column and value length are different."); + *reinterpret_cast(&(_data[offset])) = raw_value; + } + else + { + assert(false && "Unsupported type"); + } + } + + /** + * Set the data from a value for a specific column. + * + * @param index Index of the column. + * @param value New value. + */ + void set(const std::size_t index, const Value &value) + { + if (value == Type::Id::FLOAT) + { + auto v = value.get(); + set(index, v); + } + else if (value == Type::Id::DOUBLE) + { + auto v = value.get(); + set(index, v); + } + else if (value == Type::Id::LONG) + { + auto v = value.get(); + set(index, v); + } + else if (value == Type::Id::INT) + { + auto v = value.get(); + set(index, v); + } + else if (value == Type::Id::CHAR) + { + auto v = value.get(); + set(index, v); + } + } + + private: + const Schema &_schema; + const disk::Page::page_id _page_id; + const disk::Page::page_offset _in_page_offset; + std::byte *_data; +}; +} // namespace beedb::table \ No newline at end of file diff --git a/src/include/table/type.h b/src/include/table/type.h new file mode 100644 index 0000000..a9db5b0 --- /dev/null +++ b/src/include/table/type.h @@ -0,0 +1,132 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include + +namespace beedb::table +{ +/** + * Represents a database type. + */ +class Type +{ + public: + /** + * Ids for types. + */ + enum Id : std::uint16_t + { + INT, + LONG, + DOUBLE, + FLOAT, + CHAR, + UNKNOWN + }; + + Type(const Id id, const std::uint16_t length = 0u) : _id(id), _length(length) + { + } + + Type(const Type &other) = default; + + ~Type() = default; + + /** + * @return Length of the type if the length is dynamic (like char). + */ + inline std::uint16_t dynamic_length() const + { + return _length; + } + + operator Id() const + { + return _id; + } + bool operator==(Id id) const + { + return _id == id; + } + bool operator!=(Id id) const + { + return _id != id; + } + + void operator=(const Id id) + { + _id = id; + } + + Type &operator=(const Type &other) = default; + + /** + * @return Real size in bytes of the type. + */ + std::uint16_t size() const + { + switch (_id) + { + case INT: + return sizeof(std::int32_t); + case LONG: + return sizeof(std::int64_t); + case CHAR: + return sizeof(std::int8_t) * dynamic_length(); + case FLOAT: + return sizeof(float); + case DOUBLE: + return sizeof(double); + default: + return 0; + } + } + + /** + * @return Name of the type. + */ + std::string name() const + { + switch (_id) + { + case INT: + return "INT"; + case LONG: + return "LONG"; + case DOUBLE: + return "DOUBLE"; + case FLOAT: + return "FLOAT"; + case CHAR: + return "CHAR(" + std::to_string(std::int32_t(dynamic_length())) + ")"; + default: + return "UNKNOWN"; + } + } + + private: + Id _id; + std::uint16_t _length; +} __attribute__((packed)); +} // namespace beedb::table \ No newline at end of file diff --git a/src/include/table/value.h b/src/include/table/value.h new file mode 100644 index 0000000..7895637 --- /dev/null +++ b/src/include/table/value.h @@ -0,0 +1,335 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include "type.h" +#include +#include +#include +#include + +namespace beedb::table +{ +/** + * Represents a value within a tuple. + * A value contains the raw value and a type. + * The raw value will be interpreted as the given type. + */ +class Value +{ + friend std::ostream &operator<<(std::ostream &stream, const Value &value); + + public: + using value_type = std::variant; + + Value(const Type type, const value_type value) : _type(type), _value(value) + { + } + + ~Value() = default; + + /** + * @return Raw value. + */ + inline value_type value() const + { + return _value; + } + + /** + * @return Value interpreted as given data type. + */ + template T get() const + { + return std::get(_value); + } + + /** + * Updates the value. + * + * @param value New value. + */ + inline void value(const value_type value) + { + _value = value; + } + + /** + * @return Type of the value. + */ + inline const Type &type() const + { + return _type; + } + + bool operator==(const Type &type) const + { + return _type == type; + } + + bool operator==(const Value &other) const + { + if (_type != other._type) + { + return false; + } + + return _value == other._value; + } + + bool operator!=(const Value &other) const + { + if (_type != other._type) + { + return true; + } + + return _value != other._value; + } + + bool operator<=(const Value &other) const + { + if (_type != other._type) + { + return false; + } + + return _value <= other._value; + } + + bool operator<(const Value &other) const + { + if (_type != other._type) + { + return false; + } + + return _value < other._value; + } + + bool operator>=(const Value &other) const + { + if (_type != other._type) + { + return false; + } + + return _value >= other._value; + } + + bool operator>(const Value &other) const + { + if (_type != other._type) + { + return false; + } + + return _value > other._value; + } + + bool operator==(const std::nullptr_t) const + { + if (_type == Type::INT) + { + return std::get(_value) == std::numeric_limits::min(); + } + else if (_type == Type::LONG) + { + return std::get(_value) == std::numeric_limits::min(); + } + else if (_type == Type::DOUBLE) + { + return std::get(_value) == std::numeric_limits::min(); + } + else if (_type == Type::FLOAT) + { + return std::get(_value) == std::numeric_limits::min(); + } + else if (_type == Type::CHAR) + { + return std::get(_value).empty(); + } + else + { + return true; + } + } + + /** + * @return Maximal value for the stored type. + */ + Value max() const + { + if (_type == Type::INT) + { + return {_type, std::numeric_limits::max()}; + } + else if (_type == Type::LONG) + { + return {_type, std::numeric_limits::max()}; + } + else if (_type == Type::DOUBLE) + { + return {_type, std::numeric_limits::max()}; + } + else if (_type == Type::FLOAT) + { + return {_type, std::numeric_limits::max()}; + } + else if (_type == Type::CHAR) + { + return {_type, ""}; + } + else + { + return {_type, 0}; + } + } + + /** + * @return Minimal value for the stored type. + */ + Value min() const + { + if (_type == Type::INT) + { + return {_type, std::numeric_limits::min() + 1}; + } + else if (_type == Type::LONG) + { + return {_type, std::numeric_limits::min() + 1}; + } + else if (_type == Type::DOUBLE) + { + return {_type, std::numeric_limits::min() + 1}; + } + else if (_type == Type::FLOAT) + { + return {_type, std::numeric_limits::min() + 1}; + } + else if (_type == Type::CHAR) + { + return {_type, ""}; + } + else + { + return {_type, 0}; + } + } + + Value &operator+=(const Value &other) + { + if (_type == other._type) + { + if (_type == Type::INT) + { + _value = std::get(_value) + std::get(other._value); + } + else if (_type == Type::LONG) + { + _value = std::get(_value) + std::get(other._value); + } + else if (_type == Type::DOUBLE) + { + _value = std::get(_value) + std::get(other._value); + } + else if (_type == Type::FLOAT) + { + _value = std::get(_value) + std::get(other._value); + } + } + + return *this; + } + + operator std::string() const + { + if (*this == nullptr) + { + return "NULL"; + } + + if (_type == Type::INT) + { + return std::to_string(std::get(_value)); + } + else if (_type == Type::LONG) + { + return std::to_string(std::get(_value)); + } + else if (_type == Type::DOUBLE) + { + return std::to_string(std::get(_value)); + } + else if (_type == Type::FLOAT) + { + return std::to_string(std::get(_value)); + } + else if (_type == Type::CHAR) + { + return std::get(_value); + } + + return ""; + } + + private: + const Type _type; + value_type _value; +}; +} // namespace beedb::table + +namespace std +{ +template <> struct hash +{ + public: + std::size_t operator()(const beedb::table::Value &value) const + { + if (value == beedb::table::Type::INT) + { + return std::hash()(value.get()); + } + else if (value == beedb::table::Type::LONG) + { + return std::hash()(value.get()); + } + else if (value == beedb::table::Type::DOUBLE) + { + return std::hash()(value.get()); + } + else if (value == beedb::table::Type::FLOAT) + { + return std::hash()(value.get()); + } + else if (value == beedb::table::Type::CHAR) + { + return std::hash()(value.get()); + } + else + { + return 0; + } + } +}; +} // namespace std \ No newline at end of file diff --git a/src/include/util/clock.h b/src/include/util/clock.h new file mode 100644 index 0000000..3fc2558 --- /dev/null +++ b/src/include/util/clock.h @@ -0,0 +1,56 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include + +namespace beedb::util +{ +/** + * Clock for time measurements while query execution. + * The clock is started directly on creation. + */ +class Clock +{ + public: + Clock() : _start(std::chrono::steady_clock::now()) + { + } + + ~Clock() = default; + + /** + * Stops the measurement. + * + * @return Measured time in milliseconds. + */ + std::chrono::milliseconds end() const + { + const auto end = std::chrono::steady_clock::now(); + return std::chrono::duration_cast(end - _start); + } + + private: + std::chrono::time_point _start; +}; +} // namespace beedb::util \ No newline at end of file diff --git a/src/include/util/ini_parser.h b/src/include/util/ini_parser.h new file mode 100644 index 0000000..cf9f404 --- /dev/null +++ b/src/include/util/ini_parser.h @@ -0,0 +1,171 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once + +#include +#include +#include +#include + +namespace beedb::util +{ +/** + * Parses an ini file and stores the parsed key,value tuples + * including sections. + */ +class IniParser +{ + public: + using key_t = std::pair; + using value_t = std::string; + + /** + * Initializes the parser and parses the given + * .ini file. + * + * @param file_name + */ + explicit IniParser(const std::string &file_name); + + ~IniParser() = default; + + /** + * @return True, if the parsed config is empty + * (maybe because the file was not found). + */ + bool empty() const + { + return _configurations.empty(); + } + + /** + * Returns the value for a given + * key in a given section or the + * default value of the key does + * not exists. + * + * @param default_value + * @return Value of the key or default value. + */ + template T get(const std::string &, const std::string &, const T &default_value) const + { + return default_value; + } + + private: + struct key_t_hash + { + std::size_t operator()(const key_t &pair) const + { + return std::hash()(pair.first) ^ std::hash()(pair.second); + } + }; + + std::unordered_map _configurations; + + /** + * Parses the ini file with the given name. + * + * @param file_name Name of the ini file. + */ + void parse(const std::string &file_name); +}; +} // namespace beedb::util + +template <> +inline std::string beedb::util::IniParser::get(const std::string §ion, const std::string &key, + const std::string &default_value) const +{ + const auto config_key = key_t{std::make_pair(section, key)}; + if (this->_configurations.find(config_key) == this->_configurations.end()) + { + return default_value; + } + + return this->_configurations.at(config_key); +} + +template <> +inline bool beedb::util::IniParser::get(const std::string §ion, const std::string &key, + const bool &default_value) const +{ + const auto config_key = key_t{std::make_pair(section, key)}; + if (this->_configurations.find(config_key) == _configurations.end()) + { + return default_value; + } + + return bool(std::stoi(this->_configurations.at(config_key))); +} + +template <> +inline std::int32_t beedb::util::IniParser::get(const std::string §ion, const std::string &key, + const std::int32_t &default_value) const +{ + const auto config_key = key_t{std::make_pair(section, key)}; + if (this->_configurations.find(config_key) == this->_configurations.end()) + { + return default_value; + } + + return std::int32_t(std::stol(this->_configurations.at(config_key))); +} + +template <> +inline std::uint32_t beedb::util::IniParser::get(const std::string §ion, const std::string &key, + const std::uint32_t &default_value) const +{ + const auto config_key = key_t{std::make_pair(section, key)}; + if (this->_configurations.find(config_key) == this->_configurations.end()) + { + return default_value; + } + + return std::uint32_t(std::stoul(this->_configurations.at(config_key))); +} + +template <> +inline std::int64_t beedb::util::IniParser::get(const std::string §ion, const std::string &key, + const std::int64_t &default_value) const +{ + const auto config_key = key_t{std::make_pair(section, key)}; + if (this->_configurations.find(config_key) == this->_configurations.end()) + { + return default_value; + } + + return std::int64_t(std::stoll(this->_configurations.at(config_key))); +} + +template <> +inline std::uint64_t beedb::util::IniParser::get(const std::string §ion, const std::string &key, + const std::uint64_t &default_value) const +{ + const auto config_key = key_t{std::make_pair(section, key)}; + if (this->_configurations.find(config_key) == this->_configurations.end()) + { + return default_value; + } + + return std::uint64_t(std::stoull(this->_configurations.at(config_key))); +} \ No newline at end of file diff --git a/src/include/util/optional.h b/src/include/util/optional.h new file mode 100644 index 0000000..8f3ec7b --- /dev/null +++ b/src/include/util/optional.h @@ -0,0 +1,231 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include + +namespace beedb::util +{ +/** + * Implementation of an optional data container. + * Inspired by std::optional (https://en.cppreference.com/w/cpp/utility/optional). + * In contrast to std::optional, the value is moved on assignment. + */ +template class optional +{ + public: + optional() = default; + + ~optional() noexcept + { + if (_is_empty == false) + { + pointer()->~T(); + } + } + + optional(const T &t) : _is_empty(false) + { + new (_storage.data()) T(t); + } + + optional(const T &&) = delete; + + optional(T &t) : _is_empty(false) + { + new (_storage.data()) T(t); + } + + optional(T &&t) : _is_empty(false) + { + new (_storage.data()) T(std::move(t)); + } + + optional(const optional &o) : _is_empty(o._is_empty) + { + if (o._is_empty == false) + { + new (_storage.data()) T(o.value()); + } + } + + optional(const optional &&) = delete; + + optional(optional &o) : _is_empty(o._is_empty) + { + if (o._is_empty == false) + { + new (_storage.data()) T(o.value()); + } + } + + optional(optional &&o) : _is_empty(o._is_empty) + { + if (o._is_empty == false) + { + new (_storage.data()) T(std::move(o.value())); + } + o._is_empty = true; + } + + void operator=(const T &t) + { + if (_is_empty == false) + { + pointer()->~T(); + } + _is_empty = false; + new (_storage.data()) T(t); + } + + void operator=(T &t) + { + if (_is_empty == false) + { + pointer()->~T(); + } + _is_empty = false; + new (_storage.data()) T(t); + } + + void operator=(T &&t) + { + if (_is_empty == false) + { + pointer()->~T(); + } + _is_empty = false; + new (_storage.data()) T(std::move(t)); + } + + void operator=(const optional &o) + { + if (_is_empty == false) + { + pointer()->~T(); + } + _is_empty = o._is_empty; + if (o._is_empty == false) + { + new (_storage.data()) T(o.value()); + } + } + + void operator=(optional &o) + { + if (_is_empty == false) + { + pointer()->~T(); + } + _is_empty = o._is_empty; + if (o._is_empty == false) + { + new (_storage.data()) T(o.value()); + } + } + + void operator=(optional &&o) + { + if (_is_empty == false) + { + pointer()->~T(); + } + _is_empty = o._is_empty; + if (o._is_empty == false) + { + new (_storage.data()) T(std::move(o.value())); + o._is_empty = true; + } + } + + bool has_value() const noexcept + { + return _is_empty == false; + } + T &value() noexcept + { + return *pointer(); + } + const T &value() const noexcept + { + return *pointer(); + } + + void clear() noexcept + { + if (_is_empty == false) + { + pointer()->~T(); + } + _is_empty = true; + } + + T *operator->() noexcept + { + return pointer(); + } + operator T *() noexcept + { + return pointer(); + } + operator T &() noexcept + { + return value(); + } + operator const T &() const noexcept + { + return value(); + } + bool operator==(const bool b) const noexcept + { + return has_value() == b; + } + bool operator!=(const bool b) const noexcept + { + return has_value() != b; + } + bool operator==(const std::nullptr_t) const noexcept + { + return has_value() == false; + } + bool operator!=(const std::nullptr_t) const noexcept + { + return has_value() == true; + } + operator bool() const noexcept + { + return has_value(); + } + + private: + std::array _storage; + bool _is_empty = true; + + T *pointer() noexcept + { + return reinterpret_cast(_storage.data()); + } +}; +} // namespace beedb::util \ No newline at end of file diff --git a/src/include/util/quicksort.h b/src/include/util/quicksort.h new file mode 100644 index 0000000..8915be2 --- /dev/null +++ b/src/include/util/quicksort.h @@ -0,0 +1,91 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include + +namespace beedb::util +{ +/** + * Sort algorithm based on quicksort. + * Values are moved instead of copied while sorting. + */ +class Quicksort +{ + public: + /** + * Sorts the data within the given container. + * @param data Container with data. + * @param comparator Comparator for sorting. + */ + template static void sort(std::vector &data, const C &comparator) + { + sort(data, 0, data.size() - 1, comparator); + } + + private: + template + static void sort(std::vector &data, const std::int64_t low_index, const std::int64_t high_index, + const C &comparator) + { + if (low_index < high_index) + { + const auto pivot = partition(data, low_index, high_index, comparator); + + sort(data, low_index, pivot - 1, comparator); + sort(data, pivot + 1, high_index, comparator); + } + } + + template + static std::size_t partition(std::vector &data, const std::int64_t low_index, const std::int64_t high_index, + const C &comparator) + { + const auto &pivot_element = data[high_index]; + auto i = low_index; + + for (auto j = low_index; j < high_index; j++) + { + if (comparator(data[j], pivot_element)) + { + swap(data, std::size_t(i), std::size_t(j)); + i++; + } + } + swap(data, std::size_t(i), std::size_t(high_index)); + return std::size_t(i); + } + + template static void swap(std::vector &data, const std::size_t i, const std::size_t j) + { + if (i == j) + { + return; + } + auto first{std::move(data[i])}; + new ((data.data() + i)) T(std::move(data[j])); + new ((data.data() + j)) T(std::move(first)); + } +}; +} // namespace beedb::util \ No newline at end of file diff --git a/src/include/util/text_table.h b/src/include/util/text_table.h new file mode 100644 index 0000000..64ca881 --- /dev/null +++ b/src/include/util/text_table.h @@ -0,0 +1,114 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace beedb::util +{ +/** + * Formats the given data and prints it as table. + */ +class TextTable +{ + friend std::ostream &operator<<(std::ostream &stream, const TextTable &text_table); + + public: + TextTable() = default; + ~TextTable() = default; + + /** + * Set header for the table. + * + * @param row_values Header values. + */ + void header(const std::vector &&row_values); + + /** + * Adds a row to the table. + * + * @param row_values Row. + */ + inline void push_back(const std::vector &&row_values) + { + _table_rows.push_back(row_values); + } + + /** + * Clears the table. + */ + inline void clear() + { + _table_rows.clear(); + } + + /** + * @return True, when no row or header was added. + */ + inline bool empty() const + { + return _table_rows.empty(); + } + + private: + std::vector> _table_rows; + + /** + * Calculates the printed length of a given string. + * @param input String to calculate length. + * @return Number of printed characters. + */ + std::size_t printed_length(const std::string &input) const; + + /** + * Calculates the maximal length for each column. + * + * @return List of length per column. + */ + std::vector length_per_column() const; + + /** + * Prints a separator line to the given output stream. + * + * @param stream Output stream the line should be printed to. + * @param column_lengths List of maximal length per column. + * @return The given output stream. + */ + std::ostream &print_separator_line(std::ostream &stream, const std::vector &column_lengths) const; + + /** + * Prints a row to the given output stream. + * + * @param stream Output stream the row should be printed to. + * @param column_lengths List of maximal length per column. + * @param row The row that should be printed. + * @return The given output stream. + */ + std::ostream &print_row(std::ostream &stream, const std::vector &column_lengths, + const std::vector &row) const; +}; +} // namespace beedb::util \ No newline at end of file diff --git a/src/io/command/commander.cpp b/src/io/command/commander.cpp new file mode 100644 index 0000000..4a3cf9a --- /dev/null +++ b/src/io/command/commander.cpp @@ -0,0 +1,56 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include + +using namespace beedb::io::command; + +bool Commander::has_command_prefix(const std::string &input) +{ + return input.at(0u) == ':'; +} + +std::optional Commander::create_query(const std::string &input) +{ + std::regex command_regex(":([\\w\\-]+)\\s?(.*)", std::regex::icase); + std::smatch match; + + if (std::regex_match(input, match, command_regex)) + { + const auto name = match[1].str(); + const auto command_input = match[2].str(); + if (this->_registered_commands.find(name) != this->_registered_commands.end()) + { + return this->_registered_commands[name]->execute(command_input); + } + else + { + throw exception::UnknownCommandException(name); + } + } + else + { + throw exception::UnknownCommandException(input.substr(1)); + } +} \ No newline at end of file diff --git a/src/io/command/custom_commands.cpp b/src/io/command/custom_commands.cpp new file mode 100644 index 0000000..f3e1590 --- /dev/null +++ b/src/io/command/custom_commands.cpp @@ -0,0 +1,156 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include + +using namespace beedb::io::command; + +std::optional ShowCommand::execute(const std::string ¶meters) +{ + std::regex tables_regex("tables", std::regex::icase); + std::regex columns_regex("columns", std::regex::icase); + std::regex indices_regex("indices", std::regex::icase); + std::smatch match; + + if (std::regex_match(parameters, match, tables_regex)) + { + return Query{"select name as table_name from system_tables order by id asc;"}; + } + else if (std::regex_match(parameters, match, columns_regex)) + { + return Query{"select t.name as table_name, c.name as column_name from system_columns c join system_tables t on " + "c.table_id = t.id order by t.id asc, c.id asc;"}; + } + else if (std::regex_match(parameters, match, indices_regex)) + { + return Query{"select t.name as table_name, c.name as column_name, i.name as index_name, i.is_unique as " + "is_unique, i.type_id as type from system_indices i join system_columns c on i.column_id = c.id " + "join system_tables t on c.table_id = t.id order by t.id asc, c.id asc, i.id asc;"}; + } + + throw exception::CommandSyntaxException("Parameter is not recognized!", help_str()); +} + +std::optional ExplainCommand::execute(const std::string &input) +{ + std::regex explain_regex("([graph|plan]*)\\s(.*)", std::regex::icase); + std::smatch explain_regex_match; + + if (std::regex_match(input, explain_regex_match, explain_regex)) + { + if (explain_regex_match[1].str() == "plan") + { + return Query{explain_regex_match[2], Query::ExplainLevel::Plan}; + } + else if (explain_regex_match[1].str() == "graph") + { + return Query{explain_regex_match[2], Query::ExplainLevel::Graph}; + } + else + { + throw exception::CommandSyntaxException("Unknown argument \'" + explain_regex_match[1].str() + "\'", + help_str()); + } + } + else if (input.size() == 0) + { + throw exception::CommandSyntaxException("No query specified!", help_str()); + } + return Query{input, Query::ExplainLevel::Plan}; +} + +std::optional SetCommand::execute(const std::string &input) +{ + std::regex set_regex("(\\w+)\\s(.+)", std::regex::icase); + std::regex value_regex("[0-9]+", std::regex::icase); + std::smatch set_regex_match; + std::smatch value_regex_match; + + if (std::regex_match(input, set_regex_match, set_regex)) + { + const auto attribute_name = set_regex_match[1].str(); + const auto value = set_regex_match[2].str(); + + if (!std::regex_match(value, value_regex_match, value_regex)) + { + throw exception::CommandException("Options can only be set to numerical values!"); + } + + // Setting attribute to value.... + std::optional old_value; + if (_config.contains(attribute_name)) + { + old_value = _config[attribute_name]; + } + + _config.set(attribute_name, std::stoi(value)); + + std::cout << "Setting option \'" << attribute_name << "\' to value " << value; + if (old_value.has_value()) + { + std::cout << " (was " << old_value.value() << ")"; + } + std::cout << std::endl; + } + else if (input.size() > 0) + { + std::regex option_regex("(\\w+)", std::regex::icase); + // TODO: set the default option instead of throwing an error! + throw exception::CommandSyntaxException("No value specified for option \'" + input + "\'", help_str()); + } + else + { + throw exception::CommandSyntaxException("No arguments specified!", help_str()); + } + return std::nullopt; +} + +std::optional GetCommand::execute(const std::string &input) +{ + std::regex get_regex("(\\w+)"); + std::smatch get_regex_match; + + if (std::regex_match(input, get_regex_match, get_regex)) + { + // we try to find the option by name.. + if (_config.contains(input)) + { + std::cout << input << ": " << _config[input] << std::endl; + } + else + { + throw exception::CommandException("Option not found!"); + } + } + else if (input.empty()) + { + // when no argument was specified, we print all options: + std::cout << static_cast(_config) << std::endl; + } + else + { + throw exception::CommandException("\'" + input + "\' is not a valid option name!"); + } + return std::nullopt; +} \ No newline at end of file diff --git a/src/io/executor.cpp b/src/io/executor.cpp new file mode 100644 index 0000000..5cf6d7c --- /dev/null +++ b/src/io/executor.cpp @@ -0,0 +1,148 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace beedb::io; + +ExecutionResult Executor::execute(const Query &query, std::function schema_callback, + std::function tuple_callback) +{ + std::chrono::milliseconds planning_time{}, execution_time{}; + try + { + //////////////////////////////////////////////////////////////////////// + /// \brief parser this object offers all components of the posed query, + /// to construct a (logical) plan. + const parser::QueryParser &parser = parser::HSQLParser(query.query_string); + + util::Clock planning_clock{}; // we do not measure parsing time + const auto before_query_evicted_frames = this->_database.buffer_manager().evicted_frames(); + + //////////////////////////////////////////////////////////////////////// + /// \brief logical_plan holds the canonical plan, created from the query. + /// This plan has no optimizations at all. + auto canonical_logical_plan = beedb::plan::logical::Builder::build(this->_database, parser); + + /// In case this is a SELECT query and optimization is enabled... + if (parser.type() == parser::QueryParser::QueryType::SELECT && + !(this->_database.config()[Config::k_OptimizationDisableOptimization])) + { + //////////////////////////////////////////////////////////////////////// + /// \brief optimizer holds a reference to the canonical plan and can create a new, optimized plan + /// \brief optimized_plan might hold a "more efficient" plan, could be empty though + plan::logical::Optimizer optimizer(this->_database, canonical_logical_plan); + auto optimized_plan = optimizer.optimize(); + + /// If there is no new, optimized plan, the unoptimized plan will be executed instead. + const plan::logical::Plan &final_plan = + !optimized_plan.is_empty() ? optimized_plan : canonical_logical_plan; + + if (this->_database.config()[Config::k_CheckFinalPlan]) + { + assert(final_plan.is_proper_plan_graph()); // this check is not cheap. + } + //////////////////////////////////////////////////////////////////////// + /// Explain the chosen plan, when requested: + if (query.explain == Query::ExplainLevel::Graph) + { + if (!optimized_plan.is_empty()) + { + canonical_logical_plan.toConsole(); + } + final_plan.toConsole(); + } + else if (query.explain == Query::ExplainLevel::Plan) + { + final_plan.print_table(); + } + + //////////////////////////////////////////////////////////////////////// + /// create the physical plan from the logical one: + auto plan = beedb::plan::physical::Builder::build(this->_database, final_plan); + planning_time = planning_clock.end(); + + util::Clock execution_clock{}; + plan.execute(schema_callback, tuple_callback); + execution_time = execution_clock.end(); + } + else + { // just execute the canonical plan otherwise + //////////////////////////////////////////////////////////////////////// + /// create the physical plan from the logical one: + auto plan = beedb::plan::physical::Builder::build(this->_database, canonical_logical_plan); + planning_time = planning_clock.end(); + + util::Clock execution_clock{}; + plan.execute(schema_callback, tuple_callback); + execution_time = execution_clock.end(); + } + + const auto final_evicted_frames = + this->_database.buffer_manager().evicted_frames() - before_query_evicted_frames; + return {planning_time, execution_time, final_evicted_frames}; + } + catch (std::runtime_error &e) + { + std::cerr << "\033[0;31merror\033[0m> " << e.what() << std::endl; + return {}; + } + catch (beedb::exception::DatabaseException &e) + { + std::cerr << "\033[0;31merror\033[0m> " << e.what() << std::endl; + return {}; + } +} + +ExecutionResult Executor::execute(plan::physical::Plan &plan, + std::function schema_callback, + std::function tuple_callback) +{ + std::chrono::milliseconds execution_time{}; + try + { + util::Clock execution_clock{}; + plan.execute(schema_callback, tuple_callback); + execution_time = execution_clock.end(); + + return {std::chrono::milliseconds{0}, execution_time, 0u}; + } + catch (std::runtime_error &e) + { + std::cerr << "\033[0;31merror\033[0m> " << e.what() << std::endl; + return {}; + } + catch (beedb::exception::DatabaseException &e) + { + std::cerr << "\033[0;31merror\033[0m> " << e.what() << std::endl; + return {}; + } +} \ No newline at end of file diff --git a/src/io/file_executor.cpp b/src/io/file_executor.cpp new file mode 100644 index 0000000..2fc735a --- /dev/null +++ b/src/io/file_executor.cpp @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include + +using namespace beedb::io; + +void FileExecutor::import_file(const std::string &file_name) +{ + std::ifstream file(file_name); + std::stringstream statement_stream; + std::string line; + std::vector statements; + + std::regex comment_regex(".*#|.*--.*"); + std::regex long_comment_start_regex(".*\\/\\*"); + std::regex long_comment_end_regex(".*\\*\\/"); + std::regex statement_end_regex(".*;"); + std::smatch match; + bool is_within_comment = false; + + while (std::getline(file, line)) + { + if (std::regex_match(line, match, long_comment_end_regex)) + { + is_within_comment = false; + continue; + } + + if (is_within_comment || std::regex_match(line, match, comment_regex)) + { + continue; + } + + if (std::regex_match(line, match, long_comment_start_regex)) + { + is_within_comment = true; + continue; + } + + statement_stream << " " << line << std::flush; + if (std::regex_match(line, match, statement_end_regex)) + { + statements.push_back(statement_stream.str()); + statement_stream.str(""); + statement_stream.clear(); + } + } + + const auto size = statements.size(); + auto current = std::size_t{0}; + for (const auto &statement : statements) + { + const auto c = ++current; + std::cout << "\rExecuting " << c << " / " << size << " (" << std::uint16_t(100 / float(size) * float(c)) << "%)" + << std::flush; + Executor::execute(Query{statement}); + } + + std::cout << "\r" + << "\033[0;32m" + << "Executed " << size << " statements\t\t" + << " \033[0m" << std::endl; +} \ No newline at end of file diff --git a/src/io/printing_executor.cpp b/src/io/printing_executor.cpp new file mode 100644 index 0000000..32c1b6a --- /dev/null +++ b/src/io/printing_executor.cpp @@ -0,0 +1,64 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include + +using namespace beedb::io; + +void PrintingExecutor::execute(const Query &query) +{ + ResultOutputFormatter result_formatter; + + auto result = Executor::execute( + query, + [&result_formatter](const table::Schema &schema) { + if (schema.empty() == false) + { + result_formatter.header(schema); + } + }, + [&result_formatter](const table::Tuple &tuple) { result_formatter.push_back(tuple); }); + if (result.is_successful() && result_formatter.empty() == false) + { + std::cout << result_formatter; + + if (this->_database.config()[Config::k_PrintExecutionStatistics]) + { + util::TextTable stat_table; + stat_table.header({"Statistic", "Value"}); + stat_table.push_back({"Execution time", std::to_string(result.execution_time().count()) + " ms"}); + stat_table.push_back({"Plan time", std::to_string(result.build_time().count()) + " ms"}); + stat_table.push_back({"Fetched rows", std::to_string(result_formatter.count())}); + stat_table.push_back({"Evicted pages", std::to_string(result.evicted_pages())}); + std::cout << stat_table; + } + else + { + std::cout << "\033[0;32m" + << "Fetched " << result_formatter.count() << " row" << (result_formatter.count() != 1u ? "s" : "") + << " in " << (result.build_time().count() + result.execution_time().count()) << "ms" + << " \033[0m" << std::endl; + } + } +} \ No newline at end of file diff --git a/src/io/result_output_formatter.cpp b/src/io/result_output_formatter.cpp new file mode 100644 index 0000000..424a373 --- /dev/null +++ b/src/io/result_output_formatter.cpp @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include + +using namespace beedb::io; + +void ResultOutputFormatter::header(const beedb::table::Schema &schema) +{ + std::vector header; + std::transform(schema.attributes().cbegin(), schema.attributes().cend(), std::back_inserter(header), + [](const auto &attribute) -> std::string { return attribute; }); + this->_table.header(std::move(header)); +} + +void ResultOutputFormatter::push_back(const beedb::table::Tuple &tuple) +{ + if (tuple.has_data()) + { + std::vector row; + std::transform(tuple.schema().column_order().cbegin(), tuple.schema().column_order().cend(), + std::back_inserter(row), [&tuple](const auto index) -> std::string { return tuple.get(index); }); + + this->_table.push_back(std::move(row)); + this->_count_tuples++; + } +} + +void ResultOutputFormatter::push_back(const std::vector &tuples) +{ + for (const auto &row : tuples) + { + this->push_back(row); + } +} + +namespace beedb::io +{ + +std::ostream &operator<<(std::ostream &stream, const ResultOutputFormatter &result_output_formatter) +{ + return stream << result_output_formatter._table; +} +} // namespace beedb::io \ No newline at end of file diff --git a/src/io/user_console.cpp b/src/io/user_console.cpp new file mode 100644 index 0000000..890d8ef --- /dev/null +++ b/src/io/user_console.cpp @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include +#include +#include +#include + +using namespace beedb::io; + +void UserConsole::wait_for_input() +{ + std::regex quit_regex("q|quit|:q|:quit|e|exit|:e|:exit", std::regex_constants::icase); + std::cout << "Type 'q' or 'quit' to exit." << std::endl; + + command::Commander commander; + commander.register_command("show", std::make_unique()); + commander.register_command("explain", std::make_unique()); + commander.register_command("set", std::make_unique(this->_database.config())); + commander.register_command("get", std::make_unique(this->_database.config())); + + while (true) + { + std::string user_input; + + std::cout << "beedb> " << std::flush; + std::getline(std::cin, user_input); + + std::smatch match; + if (std::regex_match(user_input, match, quit_regex)) + { + std::cout << "Bye." << std::endl; + return; + } + else if (commander.has_command_prefix(user_input)) + { + try + { + std::optional query = commander.create_query(user_input); + if (query.has_value()) + { + // Note: a command does not always require the execution of a query + PrintingExecutor::execute(query.value()); + } + } + catch (exception::DatabaseException &e) + { + std::cerr << "\033[0;31merror\033[0m> " << e.what() << std::endl; + } + } + else + { + PrintingExecutor::execute(Query{user_input}); // false => no explain by default + } + } +} \ No newline at end of file diff --git a/src/parser/hsql_parser.cpp b/src/parser/hsql_parser.cpp new file mode 100644 index 0000000..c14b104 --- /dev/null +++ b/src/parser/hsql_parser.cpp @@ -0,0 +1,545 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include "parser/hsql_parser.h" +#include + +#include + +using namespace beedb::parser; + +HSQLParser::HSQLParser(const std::string &query) : QueryParser(query) +{ + hsql::SQLParser::parse(_query, &_hsql_result); + + if (!(_hsql_result.isValid() && _hsql_result.size() > 0)) + throw exception::SqlException(_hsql_result.errorMsg(), static_cast(_hsql_result.errorLine() + 1), + static_cast(_hsql_result.errorColumn())); + + // determine type of query: + _hsql_statement = _hsql_result.getStatement(0); // When do we need more then the first statement? + switch (_hsql_statement->type()) + { + case hsql::StatementType::kStmtSelect: + _query_type = QueryType::SELECT; + break; + case hsql::StatementType::kStmtCreate: { + auto create_type = reinterpret_cast(_hsql_statement)->type; + if (create_type == hsql::CreateType::kCreateTable) + { + _query_type = QueryType::CREATE_TABLE; + } + else if (create_type == hsql::CreateType::kCreateIndex) + { + _query_type = QueryType::CREATE_INDEX; + } + break; + } + case hsql::StatementType::kStmtInsert: + _query_type = QueryType::INSERT; + break; + case hsql::StatementType::kStmtUpdate: + _query_type = QueryType::UPDATE; + break; + default: + _query_type = QueryType::UNSUPPORTED; + } +} + +/// Definitions of interface functions: + +const SelectExpression beedb::parser::HSQLParser::extractSELECT_impl() const +{ + auto select_statement = static_cast(_hsql_statement); + expression::Attributes projected_columns; + for (const hsql::Expr *column_expression : *(select_statement->selectList)) + { + projected_columns.push_back(build_attribute(column_expression)); + } + + return projected_columns; +} + +const std::optional HSQLParser::extractFROM_impl() const +{ + auto table_reference = static_cast(_hsql_statement)->fromTable; + + if (table_reference != nullptr) + { + auto from = + extractFROM_impl(table_reference); // (potentially recursive) call of helper function to fill the fexp obj. + FromExpression from_expression{std::move(from)}; + return {std::move(from_expression)}; + } + else + { + return std::nullopt; + } +} + +const std::optional HSQLParser::extractWHERE_impl() const +{ + + auto where_clause = static_cast(_hsql_statement)->whereClause; + if (where_clause != nullptr) + { + return extract_predicate(where_clause); + } + else + return std::nullopt; +} + +const std::optional HSQLParser::extractGROUPBY_impl() const +{ + auto groupby_clause = static_cast(_hsql_statement)->groupBy; + + if (groupby_clause != nullptr && groupby_clause->having != nullptr) + { + throw exception::UnsupportedStatementException("HAVING"); + } + + if (groupby_clause != nullptr && groupby_clause->columns) + { + expression::Attributes group_by; + + for (const auto group_by_expression : *(groupby_clause->columns)) + { + group_by.push_back(build_attribute(group_by_expression)); + std::cout << "Group by: " << group_by.back() << std::endl; + } + + return std::optional(std::move(group_by)); // is move appropriate? + } + + else + return std::nullopt; +} + +const std::optional HSQLParser::extractORDERBY_impl() const +{ + auto orderby_clause = static_cast(_hsql_statement)->order; + if (orderby_clause != nullptr) + { + expression::Attributes order_by; + + for (const auto order_by_descr : *(orderby_clause)) + { + assert(order_by_descr && order_by_descr->expr && order_by_descr->expr->name); + auto expr = order_by_descr->expr; + const std::string column_name(expr->name); + + // table reference and alias defintions are optional: + const auto table_name = expr->table == nullptr ? std::nullopt : std::optional(expr->table); + const auto column_alias = expr->alias == nullptr ? std::nullopt : std::optional(expr->alias); + order_by.push_back(beedb::expression::Attribute{ + std::move(column_name), std::move(table_name), std::move(column_alias), + expression::AttributeOrigin::PHYSICAL, order_by_descr->type == hsql::OrderType::kOrderAsc}); + // std::cout << "Order by: " << order_by.back() << " ASC? " << + // order_by.back().order.value_or("NO ORDER DEFINED???") << std::endl; + } + + return std::optional(std::move(order_by)); // is move appropriate? + } + + else + return std::nullopt; +} + +const std::optional HSQLParser::extractLIMIT_impl() const +{ + const auto *select_statement = reinterpret_cast(this->_hsql_statement); + if (select_statement->limit == nullptr) + { + return {}; + } + + const auto limit = std::uint64_t(select_statement->limit->limit->ival); + const auto offset = + select_statement->limit->offset == nullptr ? 0u : std::uint64_t(select_statement->limit->offset->ival); + return {{limit, offset}}; +} + +/// Definitions of helper functions: + +beedb::expression::AttributeOrigin HSQLParser::determine_function_type(const std::string &function_name) const +{ + std::regex count("count", std::regex::icase); + std::regex min("min", std::regex::icase); + std::regex max("max", std::regex::icase); + std::regex sum("sum", std::regex::icase); + std::regex avg("avg", std::regex::icase); + std::smatch match; + + if (std::regex_match(function_name, match, count)) + { + return beedb::expression::AttributeOrigin::AGG_COUNT; + } + else if (std::regex_match(function_name, match, min)) + { + return beedb::expression::AttributeOrigin::AGG_MIN; + } + else if (std::regex_match(function_name, match, max)) + { + return beedb::expression::AttributeOrigin::AGG_MAX; + } + else if (std::regex_match(function_name, match, sum)) + { + return beedb::expression::AttributeOrigin::AGG_SUM; + } + else if (std::regex_match(function_name, match, avg)) + { + return beedb::expression::AttributeOrigin::AGG_AVG; + } + throw exception::UnsupportedStatementException("Function not supported: \'" + function_name + "\'"); +} + +beedb::expression::Attribute HSQLParser::build_attribute(const hsql::Expr *h_expr) const +{ + + if (h_expr->type == hsql::kExprColumnRef) + { + + if (h_expr->name == nullptr) + { + throw exception::UnsupportedStatementException("Column without name"); + } + const std::string column_name(h_expr->name); + + // table reference and alias defintions are optional: + const auto table_name = h_expr->table == nullptr ? std::nullopt : std::optional(h_expr->table); + const auto column_alias = h_expr->alias == nullptr ? std::nullopt : std::optional(h_expr->alias); + return expression::Attribute{std::move(column_name), std::move(table_name), std::move(column_alias), + expression::AttributeOrigin::PHYSICAL}; + } + + else if (h_expr->type == hsql::kExprStar) + { + const auto table_name = h_expr->table == nullptr ? std::nullopt : std::optional(h_expr->table); + // we resolve the star expression later (when database information is available). + // Otherwise, we have to introduce a 'bool is_star' member to expression::Attribute (or similar). + return expression::Asterisk{std::move(table_name)}; + } + + else if (h_expr->type == hsql::kExprFunctionRef) + { + + const auto fkt = determine_function_type(h_expr->name); + + // functions/aggregations can be renamed: + const auto column_alias = h_expr->alias == nullptr ? std::nullopt : std::optional(h_expr->name); + + // we expect the "nested expression" to give us the column name and (potentially) a table name: + const auto inner_attr = build_attribute(h_expr->exprList->at(0)); + return expression::Attribute{std::move(inner_attr.name), std::move(inner_attr.table), std::move(column_alias), + fkt}; + } + + else if (h_expr->type == hsql::kExprLiteralString || h_expr->type == hsql::kExprLiteralInt || + h_expr->type == hsql::kExprLiteralNull || h_expr->type == hsql::kExprLiteralFloat) + { + throw exception::UnsupportedStatementException("Currently no support for literals in SELECT part (TODO)!"); + } + + throw exception::UnsupportedStatementException("Unsupported expression of type " + + std::to_string(std::int32_t(h_expr->type))); +} + +std::unique_ptr HSQLParser::extractFROM_impl(const hsql::TableRef *table_ref) const +{ + + if (table_ref->type == hsql::kTableName) + { + TableDescr td{table_ref->name, + table_ref->alias == nullptr ? std::nullopt : std::optional(table_ref->alias->name)}; + FromDescr from{std::move(td)}; + return std::make_unique(std::move(from)); + } + else if (table_ref->type == hsql::kTableJoin) + { + + if (table_ref->join->type != hsql::JoinType::kJoinInner) + throw exception::UnsupportedStatementException("Only INNER joins are currently supported, sorry!"); + + // recursive call on elements of the join: + auto left_relation = extractFROM_impl(table_ref->join->left); + auto right_relation = extractFROM_impl(table_ref->join->right); + + JoinDescr join_descriptor{std::move(left_relation), std::move(right_relation), + extract_predicate(table_ref->join->condition)}; + FromDescr from{std::move(join_descriptor)}; + return std::make_unique(std::move(from)); + } + else if (table_ref->type == hsql::kTableCrossProduct) + { + const auto tables = table_ref->list; + CrossProductDescr cpd; + + for (const auto table : *tables) + { + cpd.push_back(extractFROM_impl(table)); // recursive call on elements of the cross product + } + + FromDescr from{std::move(cpd)}; + return std::make_unique(std::move(from)); + } + + else + { + throw exception::UnsupportedStatementException("No support for nested SELECT's in FROM (yet), sorry!"); + } +} + +beedb::expression::Predicate HSQLParser::extract_predicate(const hsql::Expr *expression) const +{ + + if (expression == nullptr) + { + throw exception::CanNotConvertNullptrException(); + } + + if (expression->isLiteral() && expression->isBoolLiteral) + { // check for simple truth value + return static_cast(expression->ival); + } + else + { + + // depending on the operation, we create the expression differently + + // some operations are not supported: + + switch (expression->opType) + { + // supported operations: + case hsql::OperatorType::kOpOr: + case hsql::OperatorType::kOpAnd: + case hsql::OperatorType::kOpEquals: + case hsql::OperatorType::kOpNotEquals: + case hsql::OperatorType::kOpLess: + case hsql::OperatorType::kOpLessEq: + case hsql::OperatorType::kOpGreater: + case hsql::OperatorType::kOpGreaterEq: + break; + // unsupported operations: + default: + throw exception::UnsupportedOperatorException(std::to_string(std::int32_t(expression->opType))); + } + // operation can be a logical connective...? + switch (expression->opType) + { + case hsql::OperatorType::kOpOr: + return std::make_unique(HSQLParser::extract_predicate(expression->expr), + HSQLParser::extract_predicate(expression->expr2)); + case hsql::OperatorType::kOpAnd: + return std::make_unique(HSQLParser::extract_predicate(expression->expr), + HSQLParser::extract_predicate(expression->expr2)); + default: + break; + } + + // operation is a simple atom. We expect simple operands on both sides: + auto left = build_operand(expression->expr); + + auto right = build_operand(expression->expr2); + + // TODO: move swap step into another layer, this is a logical operation and has nothing to do with the HSQL + // Parser + bool should_swap = + operands_should_swap(left, right); // this will swap operands, if lhs is a literal and rhs is an attribute + + switch (expression->opType) + { + // operation is an atom: + case hsql::OperatorType::kOpEquals: + if (should_swap) + return std::make_unique(right, left); + return std::make_unique(left, right); + case hsql::OperatorType::kOpNotEquals: + if (should_swap) + return std::make_unique(right, left); + return std::make_unique(left, right); + case hsql::OperatorType::kOpLess: + if (should_swap) + return std::make_unique(right, left); + return std::make_unique(left, right); + case hsql::OperatorType::kOpLessEq: + if (should_swap) + return std::make_unique(right, left); + return std::make_unique(left, right); + case hsql::OperatorType::kOpGreater: + if (should_swap) + return std::make_unique(right, left); + return std::make_unique(left, right); + case hsql::OperatorType::kOpGreaterEq: + if (should_swap) + return std::make_unique(right, left); + return std::make_unique(left, right); + default: + break; // should never happen, all cases covered? + } + } + throw exception::UnsupportedOperatorException(std::to_string(std::int32_t(expression->opType))); +} + +const beedb::expression::Operand HSQLParser::build_operand(const hsql::Expr *expression) const +{ + switch (expression->type) + { + case hsql::kExprColumnRef: { + auto attribute = expression::Attribute{ + expression->name, expression->table != nullptr ? std::optional(expression->table) : std::nullopt}; + return beedb::expression::Operand{attribute}; + } + case hsql::kExprLiteralInt: + return std::int64_t(expression->ival); + case hsql::kExprLiteralFloat: + return float(expression->fval); + case hsql::kExprLiteralString: + return std::string(expression->name); + default: + break; + } + return {}; +} + +const std::optional HSQLParser::extractCREATE_TABLE_impl() const +{ + const auto *create_statement = reinterpret_cast(this->_hsql_statement); + CreateTableExpression expression{}; + expression.table_name = create_statement->tableName; + expression.if_not_exists = create_statement->ifNotExists; + for (auto *column : *(create_statement->columns)) + { + ColumnExpression column_expression{}; + column_expression.column_name = column->name; + column_expression.is_null = column->nullable; + column_expression.length = column->type.length; + + switch (column->type.data_type) + { + case hsql::DataType::INT: + column_expression.type_id = table::Type::INT; + break; + case hsql::DataType::LONG: + column_expression.type_id = table::Type::LONG; + break; + case hsql::DataType::FLOAT: + column_expression.type_id = table::Type::FLOAT; + break; + case hsql::DataType::DOUBLE: + column_expression.type_id = table::Type::DOUBLE; + break; + case hsql::DataType::CHAR: + column_expression.type_id = table::Type::CHAR; + break; + default: + throw exception::UnsupportedColumnType(); + } + expression.column_expressions.push_back(std::move(column_expression)); + } + + return {std::move(expression)}; +} + +const std::optional HSQLParser::extractCREATE_INDEX_impl() const +{ + const auto *create_statement = reinterpret_cast(this->_hsql_statement); + CreateIndexExpression expression{}; + expression.table_name = create_statement->tableName; + expression.column_name = create_statement->viewColumns->at(0); + expression.index_name = create_statement->indexName; + expression.type = index::Type::BTree; + expression.is_unique = create_statement->isUnique; + expression.if_not_exists = create_statement->ifNotExists; + + return {std::move(expression)}; +} + +const std::optional HSQLParser::extractINSERT_impl() const +{ + const auto *insert_statement = reinterpret_cast(this->_hsql_statement); + InsertExpression insert_expression{}; + insert_expression.table_name = insert_statement->tableName; + for (auto column_name : *insert_statement->columns) + { + insert_expression.column_names.push_back(std::string(column_name)); + } + + auto value_index = 0u; + while (value_index < insert_statement->values->size()) + { + std::vector> value_list; + for (auto i = 0u; i < insert_expression.column_names.size(); i++) + { + const auto value = insert_statement->values->at(value_index++); + if (value->type == hsql::kExprLiteralFloat) + { + value_list.push_back({{float(value->fval)}}); + } + else if (value->type == hsql::kExprLiteralInt) + { + value_list.push_back({{std::int64_t(value->ival)}}); + } + else if (value->type == hsql::kExprLiteralString) + { + value_list.push_back({{std::string(value->name)}}); + } + else if (value->type == hsql::kExprLiteralNull) + { + value_list.push_back({}); + } + } + insert_expression.values_rows.push_back(std::move(value_list)); + } + + return {std::move(insert_expression)}; +} + +const UpdateExpression HSQLParser::extractUPDATE_impl() const +{ + const auto *update_statement = reinterpret_cast(this->_hsql_statement); + + auto from = this->extractFROM_impl(update_statement->table); + std::optional where = std::nullopt; + if (update_statement->where != nullptr) + { + where = extract_predicate(update_statement->where); + } + + std::vector> updates; + for (auto update_clause : *update_statement->updates) + { + auto value = this->build_operand(update_clause->value); + if (std::holds_alternative(value)) + { + throw exception::LogicalException("Only fix values can used for updates at the moment."); + } + + auto table_name = (update_statement->table != nullptr && update_statement->table->name != nullptr) + ? std::make_optional(std::string(update_statement->table->name)) + : std::nullopt; + + updates.push_back(std::make_pair(expression::Attribute{update_clause->column, table_name}, value)); + } + + return UpdateExpression{FromExpression{std::move(from)}, std::move(updates), std::move(where)}; +} \ No newline at end of file diff --git a/src/parser/query_parser.cpp b/src/parser/query_parser.cpp new file mode 100644 index 0000000..1f5edb2 --- /dev/null +++ b/src/parser/query_parser.cpp @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::parser; + +QueryParser::QueryParser(const std::string &query) : _query(query) +{ +} + +QueryParser::QueryType QueryParser::type() const +{ + if (!_query_type) + { + throw exception::ParserException("Query type required but not set, check sub-class construction."); + } + + return _query_type.value(); +} + +bool QueryParser::operands_should_swap(const beedb::expression::Operand &lhs, const beedb::expression::Operand &rhs) +{ + return !std::holds_alternative(lhs) && + std::holds_alternative(rhs); +} \ No newline at end of file diff --git a/src/plan/logical/builder.cpp b/src/plan/logical/builder.cpp new file mode 100644 index 0000000..7e1195f --- /dev/null +++ b/src/plan/logical/builder.cpp @@ -0,0 +1,725 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::plan::logical; + +const Plan Builder::buildSelectPlan(Database &database, const parser::QueryParser &parser) +{ + + /// FILL THE PLAN: + /// We build the canonical, 'straight-forward' plan: + /// + /// 0. Projections ("root" of the plan/the last operator) + /// 1. Selections/filters + /// 2. Aggregations + /// 3. Joins + /// 4. Cross-products + /// 5. TABLE nodes + /// + /// We also check semantics, but no optimization is done. + + logical::Plan plan; + + /// First: gather information from the parser: + + // get projection-components from parser object: + const auto select_part = parser.extractSELECT(); + + // get from-component from parser object + const auto from_part = parser.extractFROM(); + + // WHERE component + const auto where_part = parser.extractWHERE(); + + const auto groupby_part = parser.extractGROUPBY(); + + const auto orderby_part = parser.extractORDERBY(); + + // LIMIT component + const auto limit_part = parser.extractLIMIT(); + + AttributeToTableNameMapping attribute_mapping; + TableAliasMapping table_references; // used as a dictionary: name->table_ptr + std::vector source_tables; + + if (from_part.has_value()) + { + /// Second: resolve alias-defintions and perform consistency checks: + // build a mapping of alias and table descriptions. check for existing tables + for (parser::TableDescr &source_desc : from_part.value().tables()) + { + + const auto &source = source_desc.first; + const auto &alias = source_desc.second; + + if (!database.table_exists(source)) + { + // EXCEPTION: The user has specified a table in the from part, that is not in the currently loaded + // database + throw exception::TableNotFoundException(source); + } + + // check for proper disambiguation (via alias) in case of self-joins: + const auto &table_name_iter = table_references.find(alias.value_or(source)); + + if (table_name_iter != table_references.end()) + { // if name already exists... + if (!alias) + { + // not specifying an alias is an error, in this case + throw exception::MultipleTableReferences(source); + } + const auto is_alias_not_unique = table_references.find(alias.value()) != table_references.end(); + if (is_alias_not_unique) + { + // alias has to be unique: + throw exception::MultipleTableReferences(source); + } + } + + // a table can be referenced with a user specified alias or its name + table_references[alias.value_or(source)] = {database[source], static_cast(alias)}; + source_tables.push_back(database[source]); // for convenient, ordered table iteration + } + + // check join predicates: + expression::Attributes pred_attrs; + for (const auto &predicate : from_part.value().predicates()) + { + auto attributes = expression::PredicateAnalyzer::attributes(predicate); + std::move(attributes.begin(), attributes.end(), std::back_inserter(pred_attrs)); + } + + if (where_part.has_value()) + { + auto attributes = expression::PredicateAnalyzer::attributes(where_part.value()); + std::move(attributes.begin(), attributes.end(), std::back_inserter(pred_attrs)); + } + + for (const auto &pred_attr : pred_attrs) + { + populateAttributeToTableMappingAndCheck(attribute_mapping, table_references, pred_attr); + } + + // TODO: semantic check for ORDER BY and GROUP BY attributes + + if (groupby_part.has_value() && groupby_part->size() > 1) + { + throw exception::MultipleGroupByException(); + } + + if (orderby_part.has_value()) + { + for (const auto &ordered_attribute : orderby_part.value()) + { + populateAttributeToTableMappingAndCheck(attribute_mapping, table_references, ordered_attribute); + } + } + } + + // check projection attribute- and table references. + // Note: final-result schema will be created in the last step. + for (const auto &attr : select_part) + { + populateAttributeToTableMappingAndCheck(attribute_mapping, table_references, attr); + } + + // Create table-, join- and crossproduct nodes: + std::optional predicate = std::nullopt; + if (where_part.has_value()) + { + predicate.emplace(resolveTableReferences(attribute_mapping, where_part.value())); + } + + auto top_node = addFromNodes(database, plan, from_part.value().from_description->descr, attribute_mapping); + + if (where_part.has_value()) + { + top_node = addSelection(plan, predicate.value(), top_node); + } + + if (groupby_part.has_value()) + { + top_node = addGroupBy(attribute_mapping, groupby_part.value(), plan, top_node); + } + + if (orderby_part.has_value()) + { + top_node = addOrderBy(attribute_mapping, orderby_part.value(), plan, top_node); + } + + if (limit_part.has_value()) + { + top_node = addLimit(limit_part.value(), plan, top_node); + } + + addProjection(plan, select_part, attribute_mapping, top_node); + + // TODO: @Max Can this be removed? (seems to work without) + // plan[plan.last_added_node()].outgoingNodes(); + + return plan; +} + +void Builder::populateAttributeToTableMappingAndCheck(AttributeToTableNameMapping &attribute_mapping, + const TableAliasMapping &table_references, + const expression::Attribute &attr) +{ + // TODO: implement functionality and remove this check: + if (!(attr.origin == expression::AttributeOrigin::MIXED || attr.origin == expression::AttributeOrigin::PHYSICAL)) + { + throw exception::LogicalException("No support for aggregation yet, sorry!"); + } + + if (attr.table.has_value()) + { // if an attribute is used with a table reference.. + // both table alias and actual table name should resolve: + const auto &table_name_iter = table_references.find(attr.table.value()); + + if (table_name_iter == table_references.end()) + { + // EXCEPTION: the user used a table reference in the SELECT part, that can not be resolved to a table in + // FROM part + throw exception::TableNotFoundException(attr.table.value(), attr.combined_name); + } + + // The table is available, find column name in that table: + // Note: asterisk (i.e. "T.*") needs no check + if (!attr.isAsterisk() && !(table_name_iter->second.first->schema().contains(attr.name))) + { + // i.e. "if attr doesnt exist..." : + throw exception::CanNotResolveColumnException(attr.name, attr.combined_name); + } + } + else if (!attr.isAsterisk()) + { + // no table reference is given, we try to resolve it to tables mentioned in FROM part: + // Asterisk "SELECT * FROM ..." needs no check.. + std::optional last_table_name; + for (auto const &[table_name_expression, table_ref_pair] : table_references) + { + if (table_ref_pair.first->schema().contains(attr.name)) + { + const auto &new_table_name = table_name_expression; + if (last_table_name.has_value() && new_table_name != last_table_name.value()) + { + throw exception::NoUniqueReferenceException(attr.combined_name, last_table_name.value(), + new_table_name); + } + last_table_name = new_table_name; + } + } + + if (!last_table_name.has_value()) + { + throw exception::ColumnNotFoundException(attr.combined_name); + } + + attribute_mapping[attr.combined_name] = last_table_name.value(); + } + else if (table_references.size() == 0) + { + throw exception::CanNotResolveColumnException("*"); + } +} + +beedb::expression::Predicate Builder::resolveTableReferences(const AttributeToTableNameMapping &attribute_mapping, + const expression::Predicate &pred) +{ + if (std::holds_alternative>(pred)) + { + const auto &logical_connective = std::get>(pred); + auto left = resolveTableReferences(attribute_mapping, logical_connective->left); + auto right = resolveTableReferences(attribute_mapping, logical_connective->right); + auto logical_connective_ptr = logical_connective.get(); + if (typeid(*logical_connective_ptr) == typeid(expression::AND)) + { + return std::make_unique(std::move(left), std::move(right)); + } + else + { + return std::make_unique(std::move(left), std::move(right)); + } + } + else if (std::holds_alternative>(pred)) + { + auto &atom = std::get>(pred); + + // Here, we are going to move the atom, if it has an attribute, that holds no table reference. + // The new atom is identical, except for the (new) table reference + using NewAttribute = std::optional; + const NewAttribute new_left = std::holds_alternative(atom->left) + ? (!std::get(atom->left).table.has_value() + ? NewAttribute{resolveTableReferences( + attribute_mapping, std::get(atom->left))} + : std::nullopt) + : std::nullopt; + const NewAttribute new_right = + std::holds_alternative(atom->right) + ? (!std::get(atom->right).table.has_value() + ? NewAttribute{resolveTableReferences(attribute_mapping, + std::get(atom->right))} + : std::nullopt) + : std::nullopt; + + if (new_left.has_value() || new_right.has_value()) + { + auto left_operand = new_left.value_or(atom->left); + auto right_operand = new_right.value_or(atom->right); + + return atom->clone(left_operand, right_operand); + } + else + { + return atom->clone(); + } + } + else if (std::holds_alternative(pred)) + { + return std::get(pred); + } + + assert(false && "NOT ALL CASES COVERED in logical/builder.cpp: Builder::resolveTableReferences()"); + return {}; +} + +const beedb::expression::Attribute Builder::resolveTableReferences(const AttributeToTableNameMapping &attribute_mapping, + const beedb::expression::Attribute &attribute) +{ + return expression::Attribute::create(attribute, attribute_mapping.at(attribute.combined_name)); +} + +const NodeIDType Builder::addFromNodes( + const Database &db, logical::Plan &plan, + std::variant &from, + const AttributeToTableNameMapping &attribute_mapping) +{ + logical::NodeIDType node_id = plan.get_new_operator_ID("FROM"); // for now, this works/is compatible + switch (from.index()) + { + case 0ul: { // TableDescr + const auto t_d = std::get(from); + const auto &schema_attr = db[t_d.first].schema().attributes(); + + const auto attributes = expression::recreateAttributes(schema_attr, t_d.second.value_or(t_d.first)); + + plan[node_id] = std::make_unique(attributes, t_d.first); + } + break; + case 1ul: { // JoinDescr + auto &join_description = std::get(from); + const auto &join_predicate = std::get<2>(join_description); + + plan[node_id] = std::make_unique( + resolveTableReferences(attribute_mapping, join_predicate), + expression::JoinOperator::NestedLoopsJoin); // only NLJ's in the canonical plan + + // create and connect child nodes: + const auto left_child = addFromNodes(db, plan, std::get<0>(join_description)->descr, attribute_mapping); + + plan.connect(left_child, node_id); + + const auto right_child = addFromNodes(db, plan, std::get<1>(join_description)->descr, attribute_mapping); + plan.connect(right_child, node_id); + } + break; + case 2ul: { // CrossProductDescr + std::vector child_ids; + + for (const auto &el : std::get(from)) + { + child_ids.push_back(addFromNodes(db, plan, el->descr, attribute_mapping)); + } + // we build at least one CP. this will be the final top-node: + plan[node_id] = std::make_unique(); + + plan.connect(child_ids[child_ids.size() - 2], node_id); + plan.connect(child_ids[child_ids.size() - 1], node_id); + + // for each child beyond the second, we have to build another CP: + for (auto i = static_cast(child_ids.size() - 3); i >= 0; i--) + { + // note: i-- will overflow into negatives on last iteration, breaking the loop condition + + const logical::NodeIDType parent_id = plan.get_new_operator_ID("FROM"); + plan[parent_id] = std::make_unique(); + + // connect a child to new CP: + // note: child CP comes second, cross product tree "grows (upwards) to the left" + plan.connect(child_ids[static_cast(i)], parent_id); + plan.connect(node_id, parent_id); // connect new CP to parent CP + node_id = parent_id; // set this, new CP as parent for next CP + } + } + break; + default: + break; + } + + return node_id; +} + +const NodeIDType Builder::addSelection(logical::Plan &plan, const expression::Predicate &predicate, + const logical::NodeIDType &parent_id) +{ + const auto selection_name = plan.get_new_operator_ID("SELECTION"); + plan[selection_name] = std::make_unique(clone_predicate(predicate)); + + plan.connect(parent_id, selection_name); + + return selection_name; +} + +const NodeIDType Builder::addGroupBy(const AttributeToTableNameMapping &attribute_mapping, + const beedb::parser::GroupByExpression &groupbyexpr, Plan &plan, + const NodeIDType &parent_id) +{ + // TODO: Add exception + const auto groupby_name = plan.get_new_operator_ID("GROUPBY"); + expression::Attributes group_by_attributes; + for (const auto &attribute : groupbyexpr) + { + if (attribute.table.has_value()) + { + group_by_attributes.push_back(std::move(attribute)); + } + else + { + group_by_attributes.push_back(resolveTableReferences(attribute_mapping, attribute)); + } + } + + plan[groupby_name] = std::make_unique(group_by_attributes); + plan.connect(parent_id, groupby_name); + return groupby_name; +} + +const NodeIDType Builder::addOrderBy(const AttributeToTableNameMapping &attribute_mapping, + const beedb::parser::OrderByExpression &orderbyexpr, Plan &plan, + const NodeIDType &parent_id) +{ + const auto orderby_name = plan.get_new_operator_ID("ORDERBY"); + expression::Attributes order_by_attributes; + for (const auto &attribute : orderbyexpr) + { + if (attribute.table.has_value()) + { + order_by_attributes.push_back(std::move(attribute)); + } + else + { + order_by_attributes.push_back(resolveTableReferences(attribute_mapping, attribute)); + } + } + + plan[orderby_name] = std::make_unique(order_by_attributes); + plan.connect(parent_id, orderby_name); + return orderby_name; +} + +void Builder::addProjection(logical::Plan &plan, const std::vector &attributes, + const AttributeToTableNameMapping &attribute_mapping, const logical::NodeIDType &parent_id) +{ + const auto projection_name = plan.get_new_operator_ID("PROJECTION"); + + expression::Attributes projection; + // create attributes of projection: + for (const auto &attr : attributes) + { + if (attr.isAsterisk()) + { + for (const auto &child_attr : plan.produced_attributes(parent_id)) + { + // either resolve all attributes + // or only retrieve attributes for a specified table: + if ((!attr.table) || child_attr.table == attr.table) + { + projection.push_back(child_attr); + } + } + } + else + { + if (!attr.table.has_value()) + { + // resolve table reference + projection.push_back(resolveTableReferences(attribute_mapping, attr)); + } + else + { + projection.push_back(attr); + } + } + } + plan[projection_name] = std::make_unique(projection); + plan.connect(parent_id, projection_name); +} + +const NodeIDType Builder::addLimit(const beedb::parser::LimitExpression &limit_expression, + beedb::plan::logical::Plan &plan, const beedb::plan::logical::NodeIDType &parent_id) +{ + const auto limit_id = plan.get_new_operator_ID("LIMIT"); + plan[limit_id] = std::make_unique(limit_expression.limit, limit_expression.offset); + plan.connect(parent_id, limit_id); + return limit_id; +} + +const Plan Builder::buildCreateTablePlan(Database &database, const parser::QueryParser &parser) +{ + Plan plan; + const auto create_table_name = plan.get_new_operator_ID("CREATE_TABLE"); + const auto create_table_expression = parser.extractCREATE_TABLE(); + if (create_table_expression.has_value() == false) + { + throw exception::CanNotCreateTableException(); + } + + const auto table_exists = database.table_exists(create_table_expression->table_name); + if (table_exists && create_table_expression->if_not_exists == true) + { + return plan; + } + else if (table_exists) + { + throw exception::TableAlreadyExists(create_table_expression->table_name); + } + + std::vector column_names; + std::vector column_types; + std::vector column_nullables; + + for (const auto &column : create_table_expression->column_expressions) + { + column_names.push_back(column.column_name); + column_types.push_back({column.type_id, column.length}); + column_nullables.push_back(column.is_null); + } + + plan[create_table_name] = + std::make_unique(create_table_expression->table_name, std::move(column_names), + std::move(column_types), std::move(column_nullables)); + + return plan; +} + +const Plan Builder::buildCreateIndexPlan(Database &database, const parser::QueryParser &parser) +{ + Plan plan; + + const auto expression = parser.extractCREATE_INDEX(); + if (expression.has_value() == false) + { + throw exception::CanNotCreateIndexException(); + } + + if (database.table_exists(expression->table_name) == false) + { + throw exception::TableNotFoundException(expression->table_name); + } + + auto *table = database.table(expression->table_name); + if (table->schema().contains(expression->column_name) == false) + { + throw exception::ColumnNotFoundException(expression->table_name, expression->column_name); + } + + const auto column_index = table->schema().column_index(expression->column_name); + assert(column_index.has_value()); + const auto &column = table->schema()[column_index.value()]; + const auto index_exists = column.has_index(expression->index_name); + if (expression->if_not_exists == false && index_exists) + { + throw exception::IndexAlreadyExistsException(expression->table_name, expression->column_name); + } + else if (index_exists) + { + return plan; + } + + const auto id = plan.get_new_operator_ID("CREATE_INDEX"); + expression::Attribute attribute{std::move(expression->column_name), {std::move(expression->table_name)}}; + plan[id] = std::make_unique( + std::move(attribute), std::move(expression->index_name), expression->is_unique, expression->type); + + return plan; +} + +const Plan Builder::buildInsertPlan(Database &database, const parser::QueryParser &parser) +{ + Plan plan; + + const auto insert_operator_name = plan.get_new_operator_ID("INSERT"); + const auto insert_expression = parser.extractINSERT(); + + if (insert_expression.has_value() == false) + { + throw exception::CanNotInsertException(); + } + + const auto table_exists = database.table_exists(insert_expression->table_name); + if (table_exists == false) + { + throw exception::TableNotFoundException(insert_expression->table_name); + } + + auto table = database.table(insert_expression->table_name); + expression::Attributes attributes; + for (const auto &column_name : insert_expression->column_names) + { + attributes.push_back({column_name, table->name()}); + } + + for (auto i = 0u; i < table->schema().size(); i++) + { + const auto &column = table->schema().column(i); + if (column.is_nullable() == false) + { + const auto &table_attribute = table->schema().attribute(i); + auto iterator = std::find_if(attributes.begin(), attributes.end(), + [&table_attribute](const expression::Attribute &attribute) { + return attribute.name == table_attribute.name; + }); + if (iterator == attributes.end()) + { + throw exception::ColumnCanNotBeNull(table->name(), table_attribute.name); + } + } + } + + plan[insert_operator_name] = std::make_unique(insert_expression->table_name, attributes, + insert_expression->values_rows); + + return plan; +} + +const Plan Builder::buildUpdatePlan(Database &database, const parser::QueryParser &parser) +{ + Plan plan; + + const auto update_expression = parser.extractUPDATE(); + + AttributeToTableNameMapping attribute_mapping; + TableAliasMapping table_references; + std::vector source_tables; + + if (update_expression.from.has_value()) + { + for (parser::TableDescr &source_desc : update_expression.from.value().tables()) + { + const auto &source = source_desc.first; + const auto &alias = source_desc.second; + + if (!database.table_exists(source)) + { + throw exception::TableNotFoundException(source); + } + + // check for proper disambiguation (via alias) in case of self-joins: + const auto &table_name_iter = table_references.find(alias.value_or(source)); + + if (table_name_iter != table_references.end()) + { // if name already exists... + const auto is_alias_not_unique = table_references.find(alias.value()) != table_references.end(); + if (!alias || is_alias_not_unique) + { + // not specifying an alias is an error, in this case + throw exception::MultipleTableReferences(source); + } + } + + // a table can be referenced with a user specified alias or its name + table_references[alias.value_or(source)] = {database[source], static_cast(alias)}; + source_tables.push_back(database[source]); // for convenient, ordered table iteration + } + + // check join predicates: + expression::Attributes pred_attrs; + for (const auto &predicate : update_expression.from.value().predicates()) + { + auto attributes = expression::PredicateAnalyzer::attributes(predicate); + std::move(attributes.begin(), attributes.end(), std::back_inserter(pred_attrs)); + } + + if (update_expression.where.has_value()) + { + auto attributes = expression::PredicateAnalyzer::attributes(update_expression.where.value()); + std::move(attributes.begin(), attributes.end(), std::back_inserter(pred_attrs)); + } + + for (const auto &pred_attr : pred_attrs) + { + populateAttributeToTableMappingAndCheck(attribute_mapping, table_references, pred_attr); + } + } + + // check projection attribute- and table references. + // Note: final-result schema will be created in the last step. + for (const auto &update : update_expression.updates) + { + populateAttributeToTableMappingAndCheck(attribute_mapping, table_references, update.first); + } + + std::optional predicate = std::nullopt; + if (update_expression.where.has_value()) + { + predicate.emplace(resolveTableReferences(attribute_mapping, update_expression.where.value())); + } + + auto top_node = + addFromNodes(database, plan, update_expression.from.value().from_description->descr, attribute_mapping); + + if (predicate.has_value()) + { + top_node = addSelection(plan, predicate.value(), top_node); + } + + const auto update_operator_name = plan.get_new_operator_ID("UPDATE"); + + plan[update_operator_name] = std::make_unique(update_expression.updates); + plan.connect(top_node, update_operator_name); + + return plan; +} + +const Plan Builder::build(Database &database, const beedb::parser::QueryParser &parser) +{ + switch (parser.type()) + { + case parser::QueryParser::QueryType::SELECT: + return buildSelectPlan(database, parser); + case parser::QueryParser::QueryType::INSERT: + return buildInsertPlan(database, parser); + case parser::QueryParser::QueryType::CREATE_TABLE: + return buildCreateTablePlan(database, parser); + case parser::QueryParser::QueryType::CREATE_INDEX: + return buildCreateIndexPlan(database, parser); + case parser::QueryParser::QueryType::UPDATE: + return buildUpdatePlan(database, parser); + default: + throw exception::LogicalException( + "Plan construction not (yet) implemented for this query type! (QueryParser::QueryType::" + + std::to_string(static_cast(parser.type())) + ")"); + } +} \ No newline at end of file diff --git a/src/plan/logical/plan.cpp b/src/plan/logical/plan.cpp new file mode 100644 index 0000000..7b6555f --- /dev/null +++ b/src/plan/logical/plan.cpp @@ -0,0 +1,125 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include "plan/logical/plan.h" +#include +#include +#include + +using namespace beedb::plan::logical; +using namespace beedb::expression; + +OutgoingSchema Plan::produced_attributes(const NodeIDType &nid) const +{ + + // First, add attributes that are added by this operator + Attributes outgoing_schema = _node_data.at(nid)->additional_attributes; + + // multiple operators also forwards all incoming attributes: + if (_node_data.at(nid)->forwards_schema) + { + NodeIDs inc; + try + { + inc = this->_incoming_node_ids.at(nid); + } + catch (const std::out_of_range &) + { + // this is fine, there might be none + } + + // iterate over incoming edges and insert edge data into outgoing_schema: + for (const auto &inc_node_id : inc) + { + const auto &provided_schema = _edge_data.at({inc_node_id, nid}); + for (const auto &attr : provided_schema) + { + outgoing_schema.push_back(attr); + } + } + } + + return outgoing_schema; +} + +bool Plan::requirements_satisfied(const NodeIDType nid) const +{ + // first, collect incoming attributes (stored at incoming edges) + std::set incoming_schema; + NodeIDs inc; + try + { + inc = this->_incoming_node_ids.at(nid); + } + catch (const std::out_of_range &) + { + // this is fine; there might be none + } + + // iterate over incoming edges and collect edge data/incoming schemas: + for (const auto &incoming_node : inc) + { + // note: there might be multiple incoming edges for (multiway) join operators + const auto &provided_schema = _edge_data.at({incoming_node, nid}); + incoming_schema.insert(provided_schema.begin(), + provided_schema.end()); // why not "merge", C++, why.. + } + + // then test if required attributes are an actual subset of incoming attributes + Attributes intersection; + // std::set_intersection(incoming_schema.begin(), // given attributes + // incoming_schema.end(), + // _node_data.at(nid)->requirements.begin(), // desired attributes + // _node_data.at(nid)->requirements.end(), + // std::inserter(intersection, + // intersection.begin())); + + // requirement is satisfied, if intersection is the same as "requirements" + // if requirements are fully satisfied, attributes will be missing in "intersection" + // return intersection == _node_data.at(nid)->requirements; + return intersection.size() == _node_data.at(nid)->requirements.size(); +} + +void Plan::print_table() const +{ + std::function recursive_add; + + recursive_add = [this, &recursive_add](util::TextTable &table, const NodeIDType &node_id, + const std::uint32_t intend) { + std::string name = std::string(intend, ' ') + this->_node_data.at(node_id)->operator std::string(); + table.push_back({{std::move(name)}}); + const auto &children = this->incoming_nodes(node_id); + for (const auto &child : children) + { + recursive_add(table, child, intend + 2); + } + }; + + util::TextTable text_table; + text_table.header({{"Operator"}}); + const auto &root = this->find_root(); + if (root.has_value()) + { + recursive_add(text_table, root.value(), 0u); + } + std::cout << text_table << std::endl; +} \ No newline at end of file diff --git a/src/plan/optimizer/optimizer.cpp b/src/plan/optimizer/optimizer.cpp new file mode 100644 index 0000000..f31666a --- /dev/null +++ b/src/plan/optimizer/optimizer.cpp @@ -0,0 +1,172 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::plan::logical; + +Optimizer::Optimizer(const beedb::Database &database, const beedb::plan::logical::Plan &unoptimized_plan) + : _database(database), _config(database.config()), _plan(unoptimized_plan), + _plan_root(unoptimized_plan.find_root().value_or("ERROR_ROOT_NOT_FOUND")) +{ + assert(_plan_root != "ERROR_ROOT_NOT_FOUND"); + if (!_config[Config::k_OptimizationDisableOptimization]) + // if option is zero, nothing will happen in this class. TODO: We should use optional instead of empty + // classes.. + { + //// Fill helper constructs for convenient access of plan components: + fill_access_maps(); // currently unused + } +} + +const Plan Optimizer::optimize() +{ + if (!_config[Config::k_OptimizationDisableOptimization]) + // if option is zero, nothing will happen in this class. TODO: We should use optional instead of empty + // classes.. + { + logical::Plan optimized_plan; + + /// run optimizations: + + logical::Plan::NodeRecreationFunction generate_node = [&](const NodeIDType &node) { + const expression::Operator &plan_operator = *(_plan[node]); + const auto &operator_type_id = typeid(plan_operator); + + auto new_node = optimized_plan.get_new_operator_ID(node /*append old node id*/); + + if (operator_type_id == typeid(expression::JoinOperator)) + { + if (_config[Config::k_OptimizationEnableHashJoin]) + { + /** + * The conditions for replacing a join with a hash join are as follows: + * - The current join has to be a nested loops join (short: NLJ) + * - The NLJ`s predicate has to be an atom + * - the atom has to be a comparison on equality + */ + + const auto &other_join = reinterpret_cast(plan_operator); + + if (other_join.type == expression::JoinOperator::Type::NestedLoopsJoin && + std::holds_alternative>(other_join.predicate)) + { + // TODO: refactor this...: + const auto &atom = std::get>(other_join.predicate); + auto atom_ptr = atom.get(); + if (typeid(*atom_ptr) == typeid(expression::EQ)) + { + return logical::Plan::NodeMapContent{new_node, + std::make_unique( + expression::clone_predicate(other_join.predicate), + expression::JoinOperator::Type::HashJoin)}; + } + } + } + } + else if (operator_type_id == typeid(expression::TableOperator)) + { + const auto &other_table = reinterpret_cast(plan_operator); + if (_config[Config::k_OptimizationEnableIndexScan] && _plan_selection_predicate_map.size() > 0) + { + for (const auto &[selection_node, predicate] : _plan_selection_predicate_map) + { + + // TODO: refactor this....: + const auto predicate_attributes = expression::PredicateAnalyzer::attributes(predicate); + if (predicate_attributes.size() == 1 && + expression::PredicateAnalyzer::contains_not_equals_predicate(predicate) == false) + { + // First, we test if the predicate touches attributes in other_table + if (std::find(other_table.additional_attributes.begin(), + other_table.additional_attributes.end(), + predicate_attributes[0]) != other_table.additional_attributes.end()) + { + // if it does, it has to have an index for the attribute in question + const auto &column_index = + _database[other_table.physical_table_name()].schema().column_index( + predicate_attributes[0].name); + + assert(column_index.has_value()); + + const auto &column = + _database[other_table.physical_table_name()].schema().column(column_index.value()); + if (column.is_indexed( + expression::PredicateAnalyzer::contains_range_predicate(predicate))) + { + return logical::Plan::NodeMapContent{ + new_node, + std::make_unique( + other_table.additional_attributes, other_table.physical_table_name(), + expression::clone_predicate(predicate), predicate_attributes[0])}; + } + } + } + } + } + } + + // default case: no optimization is done for this node, just copy node + return logical::Plan::NodeMapContent{new_node, plan_operator.clone()}; + }; + + auto new_root = optimized_plan.descend_and_recreate(_plan, _plan_root, generate_node); + + return optimized_plan; + } + else + { + return {}; // empty plan objects will not be chosen over the "canonical" plan + } +} + +void Optimizer::optimization_force_hash_join() +{ +} + +void Optimizer::fill_access_maps() +{ + // TODO: this could be done in logical::plan + auto ref_extractor = [&](const NodeIDType &node) { + const expression::Operator &plan_operator = *(_plan[node]); + const auto &operator_type_id = typeid(plan_operator); + + if (operator_type_id == typeid(expression::JoinOperator)) + { + const auto &join = reinterpret_cast(plan_operator); + _plan_join_map.insert({node, join}); + _plan_join_predicate_map.insert({node, join.predicate}); + } + else if (operator_type_id == typeid(expression::TableOperator)) + { + _plan_table_map.insert({node, reinterpret_cast(plan_operator)}); + } + else if (operator_type_id == typeid(expression::SelectionOperator)) + { + const auto &selection = reinterpret_cast(plan_operator); + _plan_selection_map.insert({node, selection}); + _plan_selection_predicate_map.insert({node, selection.predicate}); + } + }; + + _plan.descend_and_apply(_plan_root, ref_extractor); +} \ No newline at end of file diff --git a/src/plan/physical/builder.cpp b/src/plan/physical/builder.cpp new file mode 100644 index 0000000..993bab4 --- /dev/null +++ b/src/plan/physical/builder.cpp @@ -0,0 +1,751 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace beedb::plan::physical; + +Plan Builder::build(Database &database, const logical::Plan &logical_plan) +{ + auto root = logical_plan.find_root(); + assert(root.has_value()); + auto execution_operator = build_operator(database, logical_plan, root.value()); + assert(execution_operator && "Could not create physical operators."); + return Plan(database, std::move(execution_operator)); +} + +std::unique_ptr Builder::build_operator(Database &database, + const logical::Plan &logical_plan, + const std::string &logical_node_name) +{ + if (logical_plan.is_empty()) + { + return nullptr; + } + + const auto *logical_operator = logical_plan[logical_node_name].get(); + if (typeid(*logical_operator) == typeid(expression::TableOperator)) + { + const auto table_operator = reinterpret_cast(logical_operator); + auto table = database.table(table_operator->physical_table_name()); + + auto schema = table_operator->table_alias().has_value() + ? table::Schema{table->schema(), table_operator->additional_attributes, + table_operator->table_alias().value()} + : table::Schema{table->schema()}; + + return std::make_unique(database.config()[Config::k_ScanPageLimit], schema, + database.buffer_manager(), + database.table_disk_manager(), *table); + } + else if (typeid(*logical_operator) == typeid(expression::IndexScanOperator)) + { + const auto index_scan_operator = reinterpret_cast(logical_operator); + auto table = database.table(index_scan_operator->table_name); + auto schema = table::Schema{table->schema()}; + + auto key_ranges = extract_key_ranges(index_scan_operator->predicate); + bool needs_range = false; + std::for_each(key_ranges.cbegin(), key_ranges.cend(), + [&needs_range](const execution::KeyRange &k) { needs_range |= !k.single_key(); }); + + const auto indexed_column_name = index_scan_operator->indexed_attribute.name; + const auto indexed_column_index = schema.column_index(indexed_column_name); + assert(indexed_column_index.has_value()); + auto index = schema.column(indexed_column_index.value()).index(needs_range); + + return std::make_unique(database.config()[Config::k_ScanPageLimit], schema, + database.buffer_manager(), database.table_disk_manager(), + key_ranges, index); + } + else if (typeid(*logical_operator) == typeid(expression::ProjectionOperator)) + { + const auto projection_operator = reinterpret_cast(logical_operator); + auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + auto child = build_operator(database, logical_plan, in_nodes[0]); + auto schema = table::Schema{child->schema(), projection_operator->requirements}; + auto physical_projection_operator = std::make_unique(std::move(schema)); + physical_projection_operator->child(std::move(child)); + return physical_projection_operator; + } + else if (typeid(*logical_operator) == typeid(expression::SelectionOperator)) + { + const auto selection_operator = reinterpret_cast(logical_operator); + auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + auto child = build_operator(database, logical_plan, in_nodes[0]); + auto predicate_matcher = build_predicate(selection_operator->predicate, child->schema()); + auto physical_selection_operator = + std::make_unique(child->schema(), std::move(predicate_matcher)); + physical_selection_operator->child(std::move(child)); + return physical_selection_operator; + } + else if (typeid(*logical_operator) == typeid(expression::JoinOperator)) + { + const auto join_operator = reinterpret_cast(logical_operator); + auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + auto left_child = build_operator(database, logical_plan, in_nodes[0]); + auto right_child = build_operator(database, logical_plan, in_nodes[1]); + auto join_schema = + table::Schema(left_child->schema(), right_child->schema(), + left_child->schema().table_name() + "_JOIN_" + right_child->schema().table_name()); + + std::unique_ptr physical_join_operator{}; + if (join_operator->type == expression::JoinOperator::HashJoin) + { + auto predicate_matcher = + build_predicate(join_operator->predicate, left_child->schema(), right_child->schema()); + auto attribute_matcher = + reinterpret_cast *>( + predicate_matcher.get()); + physical_join_operator = std::make_unique( + std::move(join_schema), attribute_matcher->left_index(), attribute_matcher->right_index()); + } + else + { + auto predicate_matcher = + build_predicate(join_operator->predicate, left_child->schema(), right_child->schema()); + physical_join_operator = std::make_unique(std::move(join_schema), + std::move(predicate_matcher)); + } + + physical_join_operator->left_child(std::move(left_child)); + physical_join_operator->right_child(std::move(right_child)); + return physical_join_operator; + } + else if (typeid(*logical_operator) == typeid(expression::CrossProductOperator)) + { + auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + auto left_child = build_operator(database, logical_plan, in_nodes[0]); + auto right_child = build_operator(database, logical_plan, in_nodes[1]); + auto join_schema = + table::Schema(left_child->schema(), right_child->schema(), + left_child->schema().table_name() + "_CROSS_" + right_child->schema().table_name()); + auto physical_crossproduct_operator = std::make_unique(std::move(join_schema)); + physical_crossproduct_operator->left_child(std::move(left_child)); + physical_crossproduct_operator->right_child(std::move(right_child)); + return physical_crossproduct_operator; + } + else if (typeid(*logical_operator) == typeid(expression::GroupByOperator)) + { + + // TODO: implement this (by uncommenting, when operator is implemented) + // const auto groupby_operator = reinterpret_cast(logical_operator); + // auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + // auto child = build_operator(database,logical_plan, in_nodes[0]); + /// WARNING, WHEN IMPLEMENTING: child->schema is not the proper schema, since this operator changes it/has + /// different attributes + // auto physical_groupby_operator = std::make_unique(child->schema(), + // groupby_operator->attributes); physical_groupby_operator->child(std::move(child)); + //// return physical_groupby_operator; + throw exception::NoPhysicalOperatorForNode("GROUP BY"); + } + else if (typeid(*logical_operator) == typeid(expression::AggregationOperator)) + { + + // TODO: implement this (by uncommenting, when operator is implemented) + // const auto aggregation_operator = reinterpret_cast(logical_operator); + // auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + // auto child = build_operator(database,logical_plan, in_nodes[0]); + // auto physical_aggregation_operator = std::make_unique(child->schema(), + // expression::Attributes{}, aggregation_operator->attributes); + // physical_aggregation_operator->child(std::move(child)); + // return physical_aggregation_operator; + throw exception::NoPhysicalOperatorForNode("AGGREGATE"); + } + else if (typeid(*logical_operator) == typeid(expression::OrderByOperator)) + { + const auto orderby_operator = reinterpret_cast(logical_operator); + auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + auto child = build_operator(database, logical_plan, in_nodes[0]); + std::vector> sort_indices; + for (const auto &attribute : orderby_operator->requirements) + { + assert(attribute.order.has_value()); // we expect to always have an order defined here + auto attribute_index = child->schema().column_index(attribute); + assert(attribute_index.has_value()); + sort_indices.push_back({attribute_index.value(), attribute.order.value()}); + } + auto physical_orderby_operator = + std::make_unique(child->schema(), std::move(sort_indices)); + physical_orderby_operator->child(std::move(child)); + return physical_orderby_operator; + } + else if (typeid(*logical_operator) == typeid(expression::LimitOperator)) + { + const auto limit_operator = reinterpret_cast(logical_operator); + auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + auto child = build_operator(database, logical_plan, in_nodes[0]); + auto physical_limit_operator = + std::make_unique(child->schema(), limit_operator->limit, limit_operator->offset); + physical_limit_operator->child(std::move(child)); + return physical_limit_operator; + } + else if (typeid(*logical_operator) == typeid(expression::CreateTableOperator)) + { + const auto create_table_operator = reinterpret_cast(logical_operator); + auto schema = table::Schema{create_table_operator->table_name}; + auto count_columns = create_table_operator->column_names.size(); + for (auto i = 0u; i < count_columns; i++) + { + schema.add({create_table_operator->column_types[i], create_table_operator->column_is_nullables[i]}, + {create_table_operator->column_names[i], create_table_operator->table_name}); + } + + return std::make_unique(database, std::move(schema)); + } + else if (typeid(*logical_operator) == typeid(expression::CreateIndexOperator)) + { + const auto create_index_operator = reinterpret_cast(logical_operator); + auto physical_create_index_operator = std::make_unique( + database, create_index_operator->column.table.value(), create_index_operator->column, + create_index_operator->index_name, create_index_operator->is_unique, create_index_operator->type); + auto table = database.table(create_index_operator->column.table.value()); + auto physical_scan_operator = std::make_unique( + database.config()[Config::k_ScanPageLimit], table->schema(), database.buffer_manager(), + database.table_disk_manager(), *table); + auto column_index = physical_scan_operator->schema().column_index(create_index_operator->column.name); + assert(column_index.has_value()); + + auto physical_build_index_operator = + std::make_unique(database, create_index_operator->column.table.value(), + column_index.value(), create_index_operator->index_name); + physical_build_index_operator->create_index_operator(std::move(physical_create_index_operator)); + physical_build_index_operator->data_operator(std::move(physical_scan_operator)); + return physical_build_index_operator; + } + else if (typeid(*logical_operator) == typeid(expression::InsertOperator)) + { + const auto insert_operator = reinterpret_cast(logical_operator); + auto table = database.table(insert_operator->table_name); + auto tuple_buffer_operator = std::make_unique(table->schema()); + + for (const auto &values : insert_operator->values_lists) + { + auto tuple = build_tuple(tuple_buffer_operator->schema(), insert_operator->additional_attributes, values); + tuple_buffer_operator->add(tuple); + } + + auto root_operator = std::unique_ptr(new execution::InsertOperator( + database.buffer_manager(), database.table_disk_manager(), database.system_statistics(), *table)); + root_operator->child(std::move(tuple_buffer_operator)); + + for (auto i = 0u; i < table->schema().size(); i++) + { + const auto &column = table->schema().column(i); + for (auto index : column.indices()) + { + auto add_to_index_operator = + std::unique_ptr(new execution::AddToIndexOperator(i, index)); + add_to_index_operator->child(std::move(root_operator)); + root_operator = std::move(add_to_index_operator); + } + } + + return root_operator; + } + else if (typeid(*logical_operator) == typeid(expression::UpdateOperator)) + { + const auto update_operator = reinterpret_cast(logical_operator); + auto in_nodes = logical_plan.incoming_nodes(logical_node_name); + auto child = build_operator(database, logical_plan, in_nodes[0]); + + std::vector> new_column_values; + for (const auto &attribute_value_pair : update_operator->updates) + { + const auto index = child->schema().column_index(attribute_value_pair.first); + if (index.has_value()) + { + new_column_values.push_back({index.value(), build_value(attribute_value_pair.second, + child->schema().column(index.value()).type())}); + } + } + + auto physical_update_operator = + std::make_unique(database.table_disk_manager(), new_column_values); + physical_update_operator->child(std::move(child)); + return physical_update_operator; + } + + return {}; +} + +Plan Builder::build_index_plan(Database &database, const std::string &table_name, const std::string &column_name, + const std::string &index_name) +{ + auto table = database.table(table_name); + auto physical_scan_operator = std::make_unique( + database.config()[Config::k_ScanPageLimit], table->schema(), database.buffer_manager(), + database.table_disk_manager(), *table); + const auto column_index = physical_scan_operator->schema().column_index(column_name); + assert(column_index.has_value()); + + auto physical_build_index_operator = + std::make_unique(database, table_name, column_index.value(), index_name); + + physical_build_index_operator->data_operator(std::move(physical_scan_operator)); + + return Plan(database, std::move(physical_build_index_operator)); +} + +template struct overloaded : Ts... +{ + using Ts::operator()...; +}; +template overloaded(Ts...)->overloaded; +std::unique_ptr Builder::build_predicate( + const beedb::expression::Predicate &predicate, const table::Schema &schema) +{ + std::unique_ptr matcher{}; + std::visit( + + overloaded{[&schema, &matcher](const std::unique_ptr &connective) { + auto connective_ptr = connective.get(); + if (typeid(*connective_ptr) == typeid(expression::AND)) + { + matcher = std::make_unique( + build_predicate(connective->left, schema), build_predicate(connective->right, schema)); + } + if (typeid(*connective_ptr) == typeid(expression::OR)) + { + matcher = std::make_unique(build_predicate(connective->left, schema), + build_predicate(connective->right, schema)); + } + }, + [&schema, &matcher](const std::unique_ptr &atom) { + matcher = build_predicate(atom, schema); + }, + [](bool) { + /* Nothing to deal with */ + }}, + + predicate); + + return matcher; +} + +std::unique_ptr Builder::build_predicate( + const beedb::expression::Predicate &predicate, const beedb::table::Schema &left_schema, + const beedb::table::Schema &right_schema) +{ + std::unique_ptr matcher{}; + + std::visit(overloaded{[&left_schema, &right_schema, + &matcher](const std::unique_ptr &connective) { + auto connective_ptr = connective.get(); + if (typeid(*connective_ptr) == typeid(expression::AND)) + { + matcher = std::make_unique( + build_predicate(connective->left, left_schema, right_schema), + build_predicate(connective->right, left_schema, right_schema)); + } + if (typeid(*connective_ptr) == typeid(expression::OR)) + { + matcher = std::make_unique( + build_predicate(connective->left, left_schema, right_schema), + build_predicate(connective->right, left_schema, right_schema)); + } + }, + [&left_schema, &right_schema, &matcher](const std::unique_ptr &atom) { + matcher = build_predicate(atom, left_schema, right_schema); + }, + [](bool) { + /* Nothing to deal with */ + }}, + predicate); + + return matcher; +} + +std::unique_ptr Builder::build_predicate( + const std::unique_ptr &atom, const table::Schema &schema) +{ + const auto &left = atom->left; + const auto &right = atom->right; + + const auto left_is_attribute = std::holds_alternative(left); + const auto right_is_attribute = std::holds_alternative(right); + + assert( + (left_is_attribute && !right_is_attribute) // non-attributes are always rhs + || + (!left_is_attribute && + !right_is_attribute) // actually, this is constant evaluation... TODO: remove, when optimized in logical plan + || (left_is_attribute && right_is_attribute)); + + // type of left_index is std::optional + std::optional left_index = std::nullopt; + if (left_is_attribute) + { + left_index = schema.column_index(std::get(left)); + } + assert(left_is_attribute == left_index.has_value()); // if it is an attribute, we expect to find it + + std::optional right_index = std::nullopt; + if (right_is_attribute) + { + right_index = schema.column_index(std::get(right)); + } + assert(right_is_attribute == right_index.has_value()); // if it is an attribute, we expect to find it + + auto atom_ptr = atom.get(); + if (typeid(*atom_ptr) == typeid(expression::EQ)) + { + if (left_index.has_value() && right_index.has_value()) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (left_index.has_value() && right_index.has_value() == false) + { + return std::make_unique>( + left_index.value(), build_value(right, schema[left_index.value()].type())); + } + } + else if (typeid(*atom_ptr) == typeid(expression::LE)) + { + if (left_index.has_value() && right_index.has_value()) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (left_index.has_value() && right_index.has_value() == false) + { + return std::make_unique>( + left_index.value(), build_value(right, schema[left_index.value()].type())); + } + } + else if (typeid(*atom_ptr) == typeid(expression::LT)) + { + if (left_index.has_value() && right_index.has_value()) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (left_index.has_value() && right_index.has_value() == false) + { + return std::make_unique>( + left_index.value(), build_value(right, schema[left_index.value()].type())); + } + } + else if (typeid(*atom_ptr) == typeid(expression::GE)) + { + if (left_index.has_value() && right_index.has_value()) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (left_index.has_value() && right_index.has_value() == false) + { + return std::make_unique>( + left_index.value(), build_value(right, schema[left_index.value()].type())); + } + } + else if (typeid(*atom_ptr) == typeid(expression::GT)) + { + if (left_index.has_value() && right_index.has_value()) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (left_index.has_value() && right_index.has_value() == false) + { + return std::make_unique>( + left_index.value(), build_value(right, schema[left_index.value()].type())); + } + } + else + { + if (left_index.has_value() && right_index.has_value()) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (left_index.has_value() && right_index.has_value() == false) + { + return std::make_unique>( + left_index.value(), build_value(right, schema[left_index.value()].type())); + } + } + + // Fallback (should never happen) + return std::make_unique(); +} + +std::unique_ptr Builder::build_predicate( + const std::unique_ptr &atom, const beedb::table::Schema &left_schema, + const beedb::table::Schema &right_schema) +{ + auto left = atom->left; + auto right = atom->right; + + const auto is_left_attribute = std::holds_alternative(left); + const auto is_right_attribute = std::holds_alternative(right); + assert(is_left_attribute && is_right_attribute); + + auto left_index = left_schema.column_index(std::get(left)); + auto right_index = right_schema.column_index(std::get(right)); + + if (left_index.has_value() == false && right_index.has_value() == false) + { + left_index = left_schema.column_index(std::get(right)); + right_index = right_schema.column_index(std::get(left)); + } + + assert(left_index.has_value()); + assert(right_index.has_value()); + + auto atom_ptr = atom.get(); + if (typeid(*atom_ptr) == typeid(expression::EQ)) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (typeid(*atom_ptr) == typeid(expression::LE)) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (typeid(*atom_ptr) == typeid(expression::LT)) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (typeid(*atom_ptr) == typeid(expression::GE)) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else if (typeid(*atom_ptr) == typeid(expression::GT)) + { + return std::make_unique>( + left_index.value(), right_index.value()); + } + else + { + return std::make_unique>( + left_index.value(), right_index.value()); + } +} + +beedb::table::Value Builder::build_value(const beedb::expression::Operand &operand, const table::Type &type) +{ + table::Value::value_type value; + std::visit( + [&value, &type](auto &&op) { + using T = std::decay_t; + if constexpr (std::is_same::value) + { + value = op; + } + else if constexpr (std::is_same::value) + { + if (type == table::Type::INT) + { + value = static_cast(op); + } + else if (type == table::Type::LONG) + { + value = static_cast(op); + } + } + else if constexpr (std::is_same::value) + { + if (type == table::Type::FLOAT) + { + value = static_cast(op); + } + else if (type == table::Type::DOUBLE) + { + value = static_cast(op); + } + } + }, + operand); + + return table::Value{type, value}; +} + +beedb::table::Tuple Builder::build_tuple(const beedb::table::Schema &schema, + const beedb::expression::Attributes &attributes, + const std::vector> &values) +{ + table::Tuple tuple(schema, schema.row_size()); + + std::set set_index; + for (auto i = 0u; i < attributes.size(); i++) + { + const auto &attribute = attributes[i]; + const auto index = schema.column_index(attribute); + assert(index.has_value()); + if (values[i].has_value() == false) + { + continue; + } + set_index.insert(index.value()); + const auto &column = schema.column(index.value()); + if (column == table::Type::INT) + { + auto value = std::int32_t(std::get(values[i].value())); + tuple.set(index.value(), value); + } + else if (column == table::Type::LONG) + { + auto value = std::get(values[i].value()); + tuple.set(index.value(), value); + } + else if (column == table::Type::FLOAT) + { + auto value = std::get(values[i].value()); + tuple.set(index.value(), value); + } + else if (column == table::Type::DOUBLE) + { + auto value = double(std::get(values[i].value())); + tuple.set(index.value(), value); + } + else if (column == table::Type::CHAR) + { + auto value = std::get(values[i].value()); + tuple.set(index.value(), value); + } + } + + // Set values to NULL not given by the INSERT statement + for (auto i = 0u; i < schema.size(); i++) + { + if (set_index.find(i) == set_index.end()) + { + const auto &column = schema.column(i); + if (column == table::Type::INT) + { + auto value = std::numeric_limits::min(); + tuple.set(i, value); + } + else if (column == table::Type::LONG) + { + auto value = std::numeric_limits::min(); + tuple.set(i, value); + } + else if (column == table::Type::DOUBLE) + { + auto value = std::numeric_limits::min(); + tuple.set(i, value); + } + else if (column == table::Type::FLOAT) + { + auto value = std::numeric_limits::min(); + tuple.set(i, value); + } + else if (column == table::Type::CHAR) + { + auto value = nullptr; + tuple.set(i, value); + } + } + } + + return tuple; +} + +std::set Builder::extract_key_ranges(const beedb::expression::Predicate &predicate) +{ + std::set key_ranges; + + std::visit(overloaded{[&key_ranges](const std::unique_ptr &connective) { + auto connective_ptr = connective.get(); + if (typeid(*connective_ptr) == typeid(expression::AND) || + typeid(*connective_ptr) == typeid(expression::OR)) + { + const auto left = Builder::extract_key_ranges(connective->left); + const auto right = Builder::extract_key_ranges(connective->right); + key_ranges.insert(left.begin(), left.end()); + key_ranges.insert(right.begin(), right.end()); + } + }, + [&key_ranges](const std::unique_ptr &atom) { + auto key = extract_key(atom); + if (key.has_value()) + { + auto atom_ptr = atom.get(); + if (typeid(*atom_ptr) == typeid(expression::EQ)) + { + key_ranges.insert({key.value()}); + } + else if (typeid(*atom_ptr) == typeid(expression::LT)) + { + key_ranges.insert({std::numeric_limits::min(), key.value() - 1}); + } + else if (typeid(*atom_ptr) == typeid(expression::LE)) + { + key_ranges.insert({std::numeric_limits::min(), key.value()}); + } + else if (typeid(*atom_ptr) == typeid(expression::GT)) + { + key_ranges.insert({key.value() + 1, std::numeric_limits::max()}); + } + else if (typeid(*atom_ptr) == typeid(expression::GE)) + { + key_ranges.insert({key.value(), std::numeric_limits::max()}); + } + } + }, + [](bool) {}}, + predicate); + + return key_ranges; +} + +std::optional Builder::extract_key(const std::unique_ptr &atom) +{ + const auto &left = atom->left; + const auto &right = atom->right; + + if (std::holds_alternative(left)) + { + return {std::get(left)}; + } + else if (std::holds_alternative(right)) + { + return {std::get(right)}; + } + + return {}; +} \ No newline at end of file diff --git a/src/plan/physical/plan.cpp b/src/plan/physical/plan.cpp new file mode 100644 index 0000000..d1ab042 --- /dev/null +++ b/src/plan/physical/plan.cpp @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include + +using namespace beedb::plan::physical; + +void Plan::execute(std::function schema_callback, + std::function row_callback) +{ + if (this->_root == nullptr) + { + return; + } + + this->_root->open(); + + schema_callback(this->_root->schema()); + auto tuple = this->_root->next(); + while (tuple == true) + { + row_callback(tuple); + tuple = this->_root->next(); + } + + this->_root->close(); +} \ No newline at end of file diff --git a/src/table/column.cpp b/src/table/column.cpp new file mode 100644 index 0000000..694b0bf --- /dev/null +++ b/src/table/column.cpp @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include
+namespace beedb::table +{ + +std::ostream &operator<<(std::ostream &stream, const Column &column) +{ + return stream << column.type().name() << " " << (column.is_indexed() ? "INDEXED" : "NOT INDEXED"); +} +} // namespace beedb::table \ No newline at end of file diff --git a/src/table/table.cpp b/src/table/table.cpp new file mode 100644 index 0000000..f3d54ac --- /dev/null +++ b/src/table/table.cpp @@ -0,0 +1,40 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include
+namespace beedb::table +{ +std::ostream &operator<<(std::ostream &stream, const Table &table) +{ + stream << table.name() << " ("; + for (auto const &column : table.schema().columns()) + { + stream << column; + if (column != table.schema().columns().back()) + { + stream << ", "; + } + } + + return stream << ")"; +} +} // namespace beedb::table \ No newline at end of file diff --git a/src/table/table_disk_manager.cpp b/src/table/table_disk_manager.cpp new file mode 100644 index 0000000..754373e --- /dev/null +++ b/src/table/table_disk_manager.cpp @@ -0,0 +1,115 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include
+ +using namespace beedb::table; + +TableDiskManager::TableDiskManager(disk::BufferManager &buffer_manager) : _buffer_manager(buffer_manager) +{ +} + +void TableDiskManager::add_row(Table &table, beedb::table::Tuple &&tuple) +{ + auto [page, _] = this->add_row(table, tuple); + this->_buffer_manager.unpin(page, true); +} + +Tuple TableDiskManager::add_row(Table &table, const Schema &schema, Tuple &&tuple) +{ + auto [page, offset] = this->add_row(table, tuple); + return Tuple(schema, page->id(), offset, (*page)[offset]); +} + +std::pair TableDiskManager::add_row(beedb::table::Table &table, + beedb::table::Tuple &tuple) +{ + const auto page_id = this->find_page_for_row(table); + auto page = this->_buffer_manager.pin(page_id); + + const auto offset = page->append(tuple.data(), table.schema().row_size()); + return {page, offset}; +} + +std::vector TableDiskManager::read_rows(disk::Page *page, const Schema &schema) +{ + std::vector rows; + auto offset = 0u; + while (offset < page->size()) + { + table::Tuple row(schema, page->id(), offset, (*page)[offset]); + rows.push_back(std::move(row)); + offset += schema.row_size(); + } + + return rows; +} + +void TableDiskManager::update_row(const Tuple &tuple) +{ + assert(tuple.page_id() != disk::Page::INVALID_PAGE_ID); + assert(tuple.in_page_offset() >= 0); + + auto page = this->_buffer_manager.pin(tuple.page_id()); + page->overwrite(tuple.in_page_offset(), tuple.data(), tuple.schema().row_size()); + + this->_buffer_manager.unpin(tuple.page_id(), true); +} + +beedb::disk::Page::page_id TableDiskManager::find_page_for_row(Table &table) +{ + auto starting_page_id = table.page_id(); + if (table.last_page_id() != disk::Page::INVALID_PAGE_ID) + { + starting_page_id = table.last_page_id(); + } + + auto page = this->_buffer_manager.pin(starting_page_id); + + const auto needed = table.schema().row_size(); + while (page) + { + if (page->free_space() > needed) + { + break; + } + if (page->has_next_page()) + { + const auto next_page_id = page->next_page_id(); + this->_buffer_manager.unpin(page, false); + page = this->_buffer_manager.pin(next_page_id); + } + else + { + auto new_page = this->_buffer_manager.allocate(); + page->next_page_id(new_page->id()); + this->_buffer_manager.unpin(page, true); + table.last_page_id(new_page->id()); + page = new_page; + break; + } + } + + const auto page_id = page->id(); + this->_buffer_manager.unpin(page, false); + return page_id; +} \ No newline at end of file diff --git a/src/table/value.cpp b/src/table/value.cpp new file mode 100644 index 0000000..4054a74 --- /dev/null +++ b/src/table/value.cpp @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include
+namespace beedb::table +{ + +std::ostream &operator<<(std::ostream &stream, const Value &value) +{ + return stream << value.operator std::string(); +} +} // namespace beedb::table \ No newline at end of file diff --git a/src/util/ini_parser.cpp b/src/util/ini_parser.cpp new file mode 100644 index 0000000..6d39073 --- /dev/null +++ b/src/util/ini_parser.cpp @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include +#include + +using namespace beedb::util; + +IniParser::IniParser(const std::string &file_name) +{ + this->parse(file_name); +} + +void IniParser::parse(const std::string &file_name) +{ + std::regex section_regex("^\\[([\\w\\s\\-\\.]+)\\]\\s*$"); + std::regex item_regex("^([\\w\\s\\-\\_\\.]+)\\s*=\\s*([\\w\\-\\_\\.]+)\\s*\\;?.*$"); + std::smatch match; + + std::string line; + std::ifstream file_stream(file_name); + std::string current_section = ""; + + while (std::getline(file_stream, line)) + { + if (line.empty() == false) + { + if (std::regex_match(line, match, section_regex)) + { + current_section = match[1].str(); + } + else if (std::regex_match(line, match, item_regex)) + { + auto key = match[1].str(); + const auto value = match[2].str(); + + // Remove leading and trailing spaces from key + key.erase(key.begin(), + std::find_if(key.begin(), key.end(), std::bind1st(std::not_equal_to(), ' '))); + key.erase(std::find_if(key.rbegin(), key.rend(), std::bind1st(std::not_equal_to(), ' ')).base(), + key.end()); + + const auto config_key = key_t{std::make_pair(current_section, std::move(key))}; + this->_configurations[config_key] = std::move(value); + } + } + } +} \ No newline at end of file diff --git a/src/util/text_table.cpp b/src/util/text_table.cpp new file mode 100644 index 0000000..0e2d175 --- /dev/null +++ b/src/util/text_table.cpp @@ -0,0 +1,115 @@ +/*------------------------------------------------------------------------------* + * Architecture & Implementation of DBMS * + *------------------------------------------------------------------------------* + * Copyright 2020 Databases and Information Systems Group TU Dortmund * + * Visit us at * + * http://dbis.cs.tu-dortmund.de/cms/en/home/ * + * * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * + * OTHER DEALINGS IN THE SOFTWARE. * + * * + * Authors (alphabetical list): * + * Maximilian Berens * + * Roland Kühn * + * Jan Mühlig * + *------------------------------------------------------------------------------* + */ + +#include +#include + +using namespace beedb::util; + +void TextTable::header(const std::vector &&row_values) +{ + if (this->_table_rows.empty()) + { + this->_table_rows.push_back(row_values); + } + else + { + this->_table_rows.insert(this->_table_rows.begin(), row_values); + } +} + +std::vector TextTable::length_per_column() const +{ + if (_table_rows.empty()) + { + return {}; + } + + std::vector column_lengths; + column_lengths.resize(this->_table_rows[0].size()); + for (const auto &row : this->_table_rows) + { + for (auto i = 0u; i < row.size(); i++) + { + column_lengths[i] = std::max(column_lengths[i], printed_length(row[i])); + } + } + + return column_lengths; +} + +std::ostream &TextTable::print_separator_line(std::ostream &stream, + const std::vector &column_lengths) const +{ + for (const auto length : column_lengths) + { + stream << "+" << std::string(length + 2 /* add "+" at start and end to length */, '-'); + } + + return stream << "+\n"; +} + +std::ostream &TextTable::print_row(std::ostream &stream, const std::vector &column_lengths, + const std::vector &row) const +{ + for (auto i = 0u; i < row.size(); i++) + { + const auto &cell = row[i]; + const auto spaces = column_lengths[i] - printed_length(cell); + stream << "| " << cell << std::string(spaces, ' ') << " "; + } + + return stream << "|\n"; +} + +std::size_t TextTable::printed_length(const std::string &input) const +{ + const auto print_size = + std::count_if(input.begin(), input.end(), [](std::uint8_t c) { return std::isprint(c) || std::iswprint(c); }); + + return input.size() - ((input.size() - print_size) / 2); +} + +namespace beedb::util +{ +std::ostream &operator<<(std::ostream &stream, const TextTable &text_table) +{ + if (text_table._table_rows.empty()) + { + return stream; + } + + const auto length_per_column = text_table.length_per_column(); + text_table.print_separator_line(stream, length_per_column); + text_table.print_row(stream, length_per_column, text_table._table_rows[0]); + text_table.print_separator_line(stream, length_per_column); + + for (auto i = 1u; i < text_table._table_rows.size(); i++) + { + text_table.print_row(stream, length_per_column, text_table._table_rows[i]); + } + + text_table.print_separator_line(stream, length_per_column); + + return stream << std::flush; +} +} // namespace beedb::util \ No newline at end of file