340 lines
11 KiB
C++
340 lines
11 KiB
C++
/*------------------------------------------------------------------------------*
|
|
* Architecture & Implementation of DBMS *
|
|
*------------------------------------------------------------------------------*
|
|
* Copyright 2022 Databases and Information Systems Group TU Dortmund *
|
|
* Visit us at *
|
|
* http://dbis.cs.tu-dortmund.de/cms/en/home/ *
|
|
* *
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS *
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR *
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
|
|
* OTHER DEALINGS IN THE SOFTWARE. *
|
|
* *
|
|
* Authors: *
|
|
* Maximilian Berens <maximilian.berens@tu-dortmund.de> *
|
|
* Roland Kühn <roland.kuehn@cs.tu-dortmund.de> *
|
|
* Jan Mühlig <jan.muehlig@tu-dortmund.de> *
|
|
*------------------------------------------------------------------------------*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "column.h"
|
|
#include <cassert>
|
|
#include <expression/operation.h>
|
|
#include <expression/term.h>
|
|
#include <vector>
|
|
|
|
namespace beedb::table
|
|
{
|
|
|
|
class Schema
|
|
{
|
|
public:
|
|
using ColumnIndexType = std::size_t;
|
|
|
|
Schema() = default;
|
|
Schema(const Schema &other) = default;
|
|
Schema(Schema &&other) = default;
|
|
|
|
/**
|
|
* Schema creates an empty schema
|
|
*
|
|
* @param table_name Name of the table.
|
|
*/
|
|
explicit Schema(const std::string &table_name) : _table_name(table_name)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Schema creates an empty schema
|
|
*
|
|
* @param table_name Name of the table.
|
|
*/
|
|
explicit Schema(std::string &&table_name) : _table_name(std::move(table_name))
|
|
{
|
|
}
|
|
|
|
Schema(std::string &&table_name, Schema &&other) noexcept
|
|
: _table_name(std::move(table_name)), _columns(std::move(other._columns)), _terms(std::move(other._terms)),
|
|
_offset(std::move(other._offset)), _column_order(std::move(other._column_order)), _row_size(other._row_size)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Combines to schemas to a new one.
|
|
*
|
|
* @param first First schema.
|
|
* @param second Second schema.
|
|
* @param table_name New name for the new schema (will be moved).
|
|
*/
|
|
Schema(const Schema &first, const Schema &second, std::string &&table_name) : _table_name(std::move(table_name))
|
|
{
|
|
const auto size = first.size() + second.size();
|
|
_columns.reserve(size);
|
|
_terms.reserve(size);
|
|
_column_order.reserve(size);
|
|
_offset.reserve(size);
|
|
|
|
_offset.insert(_offset.begin(), first._offset.begin(), first._offset.end());
|
|
for (const auto offset : second._offset)
|
|
{
|
|
_offset.push_back(first._row_size + offset);
|
|
}
|
|
|
|
for (auto i = 0u; i < first.size(); ++i)
|
|
{
|
|
_columns.push_back(first._columns[i]);
|
|
_terms.push_back(first._terms[i]);
|
|
_column_order.push_back(i);
|
|
}
|
|
|
|
for (auto i = 0u; i < second.size(); ++i)
|
|
{
|
|
_columns.push_back(second._columns[i]);
|
|
_terms.push_back(second._terms[i]);
|
|
_column_order.push_back(_columns.size() - 1);
|
|
}
|
|
|
|
_row_size = first._row_size + second._row_size;
|
|
}
|
|
|
|
Schema(const Schema &other, const std::vector<expression::Term> &terms, const std::string &new_table_name)
|
|
: _table_name(new_table_name), _columns(other._columns), _terms(terms), _offset(other._offset),
|
|
_column_order(other._column_order), _row_size(other._row_size)
|
|
{
|
|
}
|
|
|
|
Schema(const Schema &other, const std::vector<expression::Term> &terms)
|
|
: _table_name(other._table_name), _columns(other._columns), _terms(terms), _offset(other._offset),
|
|
_row_size(other._row_size)
|
|
{
|
|
// build _column_order based on "attributes", since it's order does not necessarily coincide with the physical
|
|
// column order
|
|
for (const auto &term : _terms)
|
|
{
|
|
|
|
// TODO: At this point, "term" may have the right column name but another table name (since other
|
|
// comes from the physical table and "term" from the logical layer (i.e. "movie.id" vs "m.id").
|
|
if (term.is_attribute())
|
|
{
|
|
const auto old_index = other.column_index(
|
|
term.get<expression::Attribute>().column_name()); // the "other" schema has the correct mapping!
|
|
if (old_index.has_value())
|
|
{
|
|
_column_order.push_back(old_index.value());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Creates a schema, used for deserialization.
|
|
*
|
|
* @param columns Columns of the schema.
|
|
* @param attributes Visible attributes.
|
|
* @param offsets Offsets for columns.
|
|
* @param column_orders Orders of attributes.
|
|
* @param row_size Size of the row in bytes.
|
|
*/
|
|
Schema(std::vector<Column> &&columns, std::vector<expression::Term> &&terms, std::vector<std::uint16_t> &&offsets,
|
|
std::vector<std::uint16_t> &&column_orders, const std::uint16_t row_size)
|
|
: _table_name(""), _columns(std::move(columns)), _terms(std::move(terms)), _offset(std::move(offsets)),
|
|
_column_order(std::move(column_orders)), _row_size(row_size)
|
|
{
|
|
}
|
|
|
|
~Schema() = default;
|
|
|
|
/**
|
|
* Adds a new column and its attribute to the schema.
|
|
*
|
|
* @param column Column to be added.
|
|
* @param attribute Logical attribute for the column.
|
|
* @param visible True, when the column is visible for output.
|
|
*/
|
|
void add(Column &&column, expression::Term &&term)
|
|
{
|
|
_terms.emplace_back(std::move(term));
|
|
_columns.push_back(column);
|
|
_column_order.push_back(_columns.size() - 1);
|
|
if (_offset.empty())
|
|
{
|
|
_offset.push_back(0u);
|
|
}
|
|
else
|
|
{
|
|
const auto last_index = _offset.size() - 1;
|
|
_offset.push_back(_offset[last_index] + _columns[last_index].type().size());
|
|
}
|
|
_row_size += column.type().size();
|
|
}
|
|
|
|
Schema &operator=(const Schema &) = default;
|
|
Schema &operator=(Schema &&) = default;
|
|
|
|
/**
|
|
* @return Number of columns in the schema.
|
|
*/
|
|
[[nodiscard]] std::size_t size() const
|
|
{
|
|
return _columns.size();
|
|
}
|
|
|
|
/**
|
|
* Calculates the byte-offset for a specific column.
|
|
*
|
|
* @param column_index Index of the column.
|
|
* @return Offset in number of bytes for the raw data access.
|
|
*/
|
|
[[nodiscard]] std::uint16_t offset(const std::size_t column_index) const
|
|
{
|
|
return _offset[column_index];
|
|
}
|
|
|
|
/**
|
|
* @return Number of bytes of the raw data.
|
|
*/
|
|
[[nodiscard]] std::uint16_t row_size() const
|
|
{
|
|
return _row_size;
|
|
}
|
|
|
|
[[nodiscard]] std::optional<ColumnIndexType> column_index(const expression::Term &term) const
|
|
{
|
|
for (auto i = 0u; i < _terms.size(); i++)
|
|
{
|
|
if (_terms[i] == term)
|
|
{ // uses combined_name/expression::Attribute equality semantics
|
|
return _column_order[i];
|
|
}
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
/**
|
|
* Calculates the index of a column in the schema. Search is solely based on the attributes NAME,
|
|
* neither on its table name or alias!!
|
|
*
|
|
* @param attribute_name Name of the column.
|
|
* @return Index in the schema.
|
|
*/
|
|
[[nodiscard]] std::optional<ColumnIndexType> column_index(const std::string &attribute_name) const
|
|
{
|
|
// this version is solely based on the attribute name
|
|
for (auto i = 0u; i < _terms.size(); i++)
|
|
{
|
|
const auto &term = _terms[i];
|
|
if (term.alias().has_value() && term.alias() == attribute_name)
|
|
{
|
|
return _column_order[i];
|
|
}
|
|
else if (term.is_attribute())
|
|
{
|
|
if (term.get<expression::Attribute>().column_name() == attribute_name)
|
|
{
|
|
return _column_order[i];
|
|
}
|
|
}
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
/**
|
|
* Checks whether the schema holds a specific attribute.
|
|
*
|
|
* @param attribute Logical attribute.
|
|
* @return True, when the schema contains the attribute.
|
|
*/
|
|
[[nodiscard]] bool contains(const expression::Term &term) const
|
|
{
|
|
return column_index(term).has_value();
|
|
}
|
|
|
|
/**
|
|
* Checks whether the schema holds a specific column.
|
|
*
|
|
* @param column_name Name of the column.
|
|
* @return True, when the schema contains a column with the give name.
|
|
*/
|
|
[[nodiscard]] bool contains(const std::string &column_name) const
|
|
{
|
|
return column_index(column_name).has_value();
|
|
}
|
|
|
|
/**
|
|
* @return Name of the table represented by this schema.
|
|
*/
|
|
[[nodiscard]] const std::string &table_name() const
|
|
{
|
|
return _table_name;
|
|
}
|
|
|
|
/**
|
|
* @return Constant set of all columns.
|
|
*/
|
|
[[nodiscard]] const std::vector<Column> &columns() const
|
|
{
|
|
return _columns;
|
|
}
|
|
|
|
/**
|
|
* Access to a specific column.
|
|
*
|
|
* @param index Index of the column.
|
|
* @return Constant access to the column.
|
|
*/
|
|
[[nodiscard]] const Column &column(const std::size_t index) const
|
|
{
|
|
return _columns[index];
|
|
}
|
|
|
|
[[nodiscard]] Column &column(const std::size_t index)
|
|
{
|
|
return _columns[index];
|
|
}
|
|
|
|
[[nodiscard]] const std::vector<expression::Term> &terms() const
|
|
{
|
|
return _terms;
|
|
}
|
|
|
|
/**
|
|
* @return True, when no column was added to the schema.
|
|
*/
|
|
[[nodiscard]] bool empty() const
|
|
{
|
|
return _columns.empty();
|
|
}
|
|
|
|
/**
|
|
* @return Order of the column indices.
|
|
*/
|
|
[[nodiscard]] const std::vector<std::uint16_t> &column_order() const
|
|
{
|
|
return _column_order;
|
|
}
|
|
|
|
Column &operator[](const ColumnIndexType index)
|
|
{
|
|
return _columns[index];
|
|
}
|
|
const Column &operator[](const ColumnIndexType index) const
|
|
{
|
|
return _columns[index];
|
|
}
|
|
|
|
private:
|
|
std::string _table_name;
|
|
std::vector<Column> _columns;
|
|
std::vector<expression::Term> _terms;
|
|
std::vector<std::uint16_t> _offset;
|
|
std::vector<std::uint16_t> _column_order;
|
|
std::uint16_t _row_size = 0u;
|
|
};
|
|
} // namespace beedb::table
|