8000 Propagate inclusion dependencies in the LQP by dey4ss · Pull Request #2701 · hyrise/hyrise · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Propagate inclusion dependencies in the LQP #2701

8000
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
8 changes: 7 additions & 1 deletion src/benchmark/join_order_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,13 @@ void add_key_constraints(std::unordered_map<std::string, BenchmarkTableInfo>& ta

// aka_title - 1 PK, 1 FK.
primary_key_constraint(aka_title_table, {"id"});
foreign_key_constraint(aka_title_table, {"movie_id"}, title_table, {"id"});
// The following foreign key is specified in the paper, but it is violated by 93 tuples. You can check that with the
// following query:
// SELECT COUNT(*)
// FROM aka_title LEFT OUTER JOIN title ON aka_title.movie_id = title.id
// WHERE aka_title.movie_id IS NOT NULL AND title.id IS NULL;
//
// foreign_key_constraint(aka_title_table, {"movie_id"}, title_table, {"id"});

// cast_info - 1 PK, 4 FKs.
primary_key_constraint(cast_info_table, {"id"});
Expand Down
2 changes: 2 additions & 0 deletions src/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ set(
logical_query_plan/create_view_node.hpp
logical_query_plan/data_dependencies/functional_dependency.cpp
logical_query_plan/data_dependencies/functional_dependency.hpp
logical_query_plan/data_dependencies/inclusion_dependency.cpp
logical_query_plan/data_dependencies/inclusion_dependency.hpp
logical_query_plan/data_dependencies/order_dependency.cpp
logical_query_plan/data_dependencies/order_dependency.hpp
logical_query_plan/data_dependencies/unique_column_combination.cpp
Expand Down
116 changes: 109 additions & 7 deletions src/lib/logical_query_plan/abstract_lqp_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@

#include "expression/abstract_expression.hpp"
#include "expression/expression_utils.hpp"
#include "expression/lqp_column_expression.hpp"
#include "expression/lqp_subquery_expression.hpp"
#include "hyrise.hpp"
#include "logical_query_plan/data_dependencies/functional_dependency.hpp"
#include "logical_query_plan/data_dependencies/inclusion_dependency.hpp"
#include "logical_query_plan/data_dependencies/order_dependency.hpp"
#include "logical_query_plan/data_dependencies/unique_column_combination.hpp"
#include "logical_query_plan/stored_table_node.hpp"
#include "lqp_utils.hpp"
#include "predicate_node.hpp"
#include "storage/storage_manager.hpp"
#include "types.hpp"
#include "update_node.hpp"
#include "utils/assert.hpp"
Expand Down Expand Up @@ -276,7 +281,7 @@ ColumnID AbstractLQPNode::get_column_id(const AbstractExpression& expression) co
}

bool AbstractLQPNode::has_output_expressions(const ExpressionUnorderedSet& expressions) const {
const auto& output_expressions = this->output_expressions();
const auto output_expressions = this->output_expressions();
return contains_all_expressions(expressions, output_expressions);
}

Expand All @@ -292,7 +297,7 @@ bool AbstractLQPNode::has_matching_ucc(const ExpressionUnorderedSet& expressions
DebugAssert(has_output_expressions(expressions),
"The given expressions are not a subset of the LQP's output expressions.");

const auto& unique_column_combinations = this->unique_column_combinations();
const auto unique_column_combinations = this->unique_column_combinations();
if (unique_column_combinations.empty()) {
return false;
}
Expand All @@ -310,7 +315,7 @@ bool AbstractLQPNode::has_matching_od(
DebugAssert(has_output_expressions({ordered_expressions.cbegin(), ordered_expressions.cend()}),
"The given ordered expressions are not a subset of the LQP's output expressions.");

const auto& order_dependencies = this->order_dependencies();
const auto order_dependencies = this->order_dependencies();
if (order_dependencies.empty()) {
return false;
}
Expand Down Expand Up @@ -342,6 +347,87 @@ bool AbstractLQPNode::has_matching_od(
return false;
}

bool AbstractLQPNode::has_matching_ind(const std::vector<std::shared_ptr<AbstractExpression>>& foreign_key_expressions,
const std::vector<std::shared_ptr<AbstractExpression>>& key_expressions) const {
const auto ind_size = key_expressions.size();
Assert(ind_size > 0, "Invalid input. Set of expressions should not be empty.");
DebugAssert(ind_size == foreign_key_expressions.size(), "Invlid IND requested.");
DebugAssert(has_output_expressions({key_expressions.cbegin(), key_expressions.cend()}),
"The given expressions are not a subset of the LQP's output expressions.");

// Gather valid INDs for current node.
const auto inclusion_dependencies = this->inclusion_dependencies();
if (inclusion_dependencies.empty()) {
return false;
}

// Translate required `foreign_key_expressions` to ColumnIDs of the actual stored table.
auto required_table = std::shared_ptr<Table>{};
auto required_column_id_for_expression = std::vector<std::pair<ColumnID, std::shared_ptr<AbstractExpression>>>{};
required_column_id_for_expression.reserve(ind_size);

for (auto expression_idx = ColumnID{0}; expression_idx < ind_size; ++expression_idx) {
const auto& expression = foreign_key_expressions[expression_idx];
DebugAssert(expression->type == ExpressionType::LQPColumn, "Expected column expression.");
const auto& lqp_column_expression = static_cast<const LQPColumnExpression&>(*expression);
const auto original_node = lqp_column_expression.original_node.lock();
Assert(original_node, "Could not resolve original node. LQP is invalid.");
if (original_node->type != LQPNodeType::StoredTable) {
return false;
}

const auto& stored_table_node = static_cast<const StoredTableNode&>(*original_node);
const auto table = Hyrise::get().storage_manager.get_table(stored_table_node.table_name);
if (!required_table) {
required_table = table;
} else {
// Columns must come from a single table.
Assert(required_table == table, "Invalid IND requested.");
}
required_column_id_for_expression.emplace_back(lqp_column_expression.original_column_id,
key_expressions[expression_idx]);
}
// INDs can be equivalent, e.g., [A.a, A.b] in [B.x, B.y] is the same as [A.b, A.a] in [B.y, B.x]. Thus, we sort
// persisted ForeignKeyConstraints by the foreign key's ColumnIDs. We do the same here to compare with the valid INDs
// of the current node.
std::ranges::sort(required_column_id_for_expression, [](const auto& lhs, const auto& rhs) {
return lhs.first < rhs.first;
});

// Look for an inclusion dependency that is based on the given expressions.
for (const auto& ind : inclusion_dependencies) {
if (required_table != ind.included_table) {
continue;
}
// IND references the same included table. Now, check that it also has the required columns in the correct order.
auto required_columns_it = required_column_id_for_expression.cbegin();
const auto found_ind_size = ind.expressions.size();
const auto required_columns_end = required_column_id_for_expression.cend();
for (auto expression_idx = ColumnID{0};
expression_idx < found_ind_size && required_columns_it != required_columns_end; ++expression_idx) {
// Did not reach current required ColumnID yet.
if (required_columns_it->first > ind.included_column_ids[expression_idx]) {
continue;
}

// IND does not match if the current ColumnID is missing there or it references the wrong column.
if (required_columns_it->first < ind.included_column_ids[expression_idx] ||
*required_columns_it->second != *ind.expressions[expression_idx]) {
break;
}

// Current required ColumnID is present and the referenced columns match. Continue with the next one.
++required_columns_it;
}
// IND matches if all ColumnID-Expression-pairs match.
if (required_columns_it == required_columns_end) {
return true;
}
}

return false;
}

FunctionalDependencies AbstractLQPNode::functional_dependencies() const {
// (1) Gather non-trivial FDs and perform sanity checks.
const auto& non_trivial_fds = non_trivial_functional_dependencies();
Expand Down Expand Up @@ -477,12 +563,13 @@ void AbstractLQPNode::_add_output_pointer(const std::shared_ptr<AbstractLQPNode>

UniqueColumnCombinations AbstractLQPNode::_forward_left_unique_column_combinations() const {
Assert(left_input(), "Cannot forward unique column combinations without an input node.");
const auto& input_unique_column_combinations = left_input()->unique_column_combinations();
const auto input_unique_column_combinations = left_input()->unique_column_combinations();

if constexpr (HYRISE_DEBUG) {
// Check whether output expressions are missing.
const auto output_expressions = this->output_expressions();
for (const auto& ucc : input_unique_column_combinations) {
Assert(has_output_expressions(ucc.expressions),
Assert(contains_all_expressions(ucc.expressions, output_expressions),
"Forwarding of UCC is illegal because node misses output expressions.");
}
}
Expand All @@ -491,11 +578,11 @@ UniqueColumnCombinations AbstractLQPNode::_forward_left_unique_column_combinatio

OrderDependencies AbstractLQPNode::_forward_left_order_dependencies() const {
Assert(left_input(), "Cannot forward order dependencies without an input node.");
const auto& input_order_dependencies = left_input()->order_dependencies();
const auto input_order_dependencies = left_input()->order_dependencies();

if constexpr (HYRISE_DEBUG) {
// Check whether output expressions are missing.
const auto& output_expressions = this->output_expressions();
const auto output_expressions = this->output_expressions();
for (const auto& od : input_order_dependencies) {
Assert(contains_all_expressions(od.ordering_expressions, output_expressions) &&
contains_all_expressions(od.ordered_expressions, output_expressions),
Expand All @@ -505,6 +592,21 @@ OrderDependencies AbstractLQPNode::_forward_left_order_dependencies() const {
return input_order_dependencies;
}

InclusionDependencies AbstractLQPNode::_forward_left_inclusion_dependencies() const {
Assert(left_input(), "Cannot forward inclusion dependencies without an input node.");
const auto input_inclusion_dependencies = left_input()->inclusion_dependencies();

if constexpr (HYRISE_DEBUG) {
// Check whether output expressions are missing.
const auto output_expressions = this->output_expressions();
for (const auto& ind : input_inclusion_dependencies) {
Assert(contains_all_expressions(ind.expressions, output_expressions),
"Forwarding of IND is illegal because node misses output expressions.");
}
}
return input_inclusion_dependencies;
}

AbstractExpression::DescriptionMode AbstractLQPNode::_expression_description_mode(const DescriptionMode mode) {
switch (mode) {
case DescriptionMode::Short:
Expand Down
20 changes: 13 additions & 7 deletions src/lib/logical_query_plan/abstract_lqp_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "enable_make_for_lqp_node.hpp"
#include "logical_query_plan/data_dependencies/functional_dependency.hpp"
#include "logical_query_plan/data_dependencies/inclusion_dependency.hpp"
#include "logical_query_plan/data_dependencies/order_dependency.hpp"
#include "logical_query_plan/data_dependencies/unique_column_combination.hpp"

Expand Down Expand Up @@ -232,6 +233,15 @@ class AbstractLQPNode : public std::enable_shared_from_this<AbstractLQPNode> {
bool has_matching_od(const std::vector<std::shared_ptr<AbstractExpression>>& ordering_expressions,
const std::vector<std::shared_ptr<AbstractExpression>>& ordered_expressions) const;

virtual InclusionDependencies inclusion_dependencies() const = 0;

/**
* @return True if there is an inclusion dependency (IND) matching the given list of output expressions and the given
* list of expressions to be included from another input.
*/
bool has_matching_ind(const std::vector<std::shared_ptr<AbstractExpression>>& foreign_key_expressions,
const std::vector<std::shared_ptr<AbstractExpression>>& key_expressions) const;

/**
* Perform a deep equality check
*/
Expand Down Expand Up @@ -273,16 +283,12 @@ class AbstractLQPNode : public std::enable_shared_from_this<AbstractLQPNode> {
virtual bool _on_shallow_equals(const AbstractLQPNode& rhs, const LQPNodeMapping& node_mapping) const = 0;

/**
* This is a helper method for node types that do not have an effect on the UCCs from input nodes.
* @return All unique column combinations from the left input node.
* These are helper methods for node types that do not have an effect on the input nodes' data dependencies.
* @return All data dependencies of a specific type from the left input node.
*/
UniqueColumnCombinations _forward_left_unique_column_combinations() const;

/**
* This is a helper method for node types that do not have an effect on the ODs from input nodes.
* @return All order dependencies from the left input node.
*/
OrderDependencies _forward_left_order_dependencies() const;
InclusionDependencies _forward_left_inclusion_dependencies() const;

/*
* Converts an AbstractLQPNode::DescriptionMode to an AbstractExpression::DescriptionMode
Expand Down
10 changes: 7 additions & 3 deletions src/lib/logical_query_plan/abstract_non_query_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "expression/abstract_expression.hpp"
#include "logical_query_plan/data_dependencies/functional_dependency.hpp"
#include "logical_query_plan/data_dependencies/inclusion_dependency.hpp"
#include "logical_query_plan/data_dependencies/order_dependency.hpp"
#include "logical_query_plan/data_dependencies/unique_column_combination.hpp"
#include "types.hpp"
Expand All @@ -24,14 +25,17 @@ OrderDependencies AbstractNonQueryNode::order_dependencies() const {
Fail("Node does not support order depedencies.");
}

InclusionDependencies AbstractNonQueryNode::inclusion_dependencies() const {
Fail("Node does not support inclusion depedencies.");
}

FunctionalDependencies AbstractNonQueryNode::non_trivial_functional_dependencies() const {
Fail("Node does not support functional dependencies.");
}

bool AbstractNonQueryNode::is_column_nullable(const ColumnID /*column_id*/) const {
// The majority of non-query nodes output no column (CreateTable, DropTable, ...). Non-query nodes that return
// columns (ShowColumns, ...) need to override this function.
Fail("Node does not return any column");
// Our non-query nodes output no column (CreateTable, DropTable, ...).
Fail("Node does not return any column.");
}

} // namespace hyrise
15 changes: 9 additions & 6 deletions src/lib/logical_query_plan/abstract_non_query_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,24 @@ namespace hyrise {
/**
* Base class for LQP nodes that do not query data (e.g, DML and DDL nodes) and therefore do not output columns.
*
* Helper class that provides a output_expressions() override and contains an empty dummy expression vector
* Helper class that provides overrides for output expressions, data dependencies, and other information we use during
* query optimization.
*/
class AbstractNonQueryNode : public AbstractLQPNode {
public:
using AbstractLQPNode::AbstractLQPNode;

std::vector<std::shared_ptr<AbstractExpression>> output_expressions() const override;
std::vector<std::shared_ptr<AbstractExpression>> output_expressions() const final;

UniqueColumnCombinations unique_column_combinations() const override;
UniqueColumnCombinations unique_column_combinations() const final;

OrderDependencies order_dependencies() const override;
OrderDependencies order_dependencies() const final;

FunctionalDependencies non_trivial_functional_dependencies() const override;
InclusionDependencies inclusion_dependencies() const final;

bool is_column_nullable(const ColumnID /*column_id*/) const override;
FunctionalDependencies non_trivial_functional_dependencies() const final;

bool is_column_nullable(const ColumnID /*column_id*/) const final;
};

} // namespace hyrise
18 changes: 18 additions & 0 deletions src/lib/logical_query_plan/aggregate_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "expression/window_function_expression.hpp"
#include "logical_query_plan/abstract_lqp_node.hpp"
#include "logical_query_plan/data_dependencies/functional_dependency.hpp"
#include "logical_query_plan/data_dependencies/inclusion_dependency.hpp"
#include "logical_query_plan/data_dependencies/order_dependency.hpp"
#include "logical_query_plan/data_dependencies/unique_column_combination.hpp"
#include "lqp_utils.hpp"
Expand Down Expand Up @@ -189,6 +190,23 @@ OrderDependencies AggregateNode::order_dependencies() const {
return order_dependencies;
}

InclusionDependencies AggregateNode::inclusion_dependencies() const {
auto inclusion_dependencies = InclusionDependencies{};

// Similarly to UCCs and ODs, forward INDs if all expressions are part of the GROUP-BY expressions. ANY() aggregates
// are already translated back to regular LQPColumnExpressions by `output_expressions`.
const auto input_inclusion_dependencies = left_input()->inclusion_dependencies();
const auto output_expressions = this->output_expressions();

for (const auto& input_inclusion_dependency : input_inclusion_dependencies) {
if (contains_all_expressions(input_inclusion_dependency.expressions, output_expressions)) {
inclusion_dependencies.emplace(input_inclusion_dependency);
}
}

return inclusion_dependencies;
}

FunctionalDependencies AggregateNode::non_trivial_functional_dependencies() const {
auto non_trivial_fds = left_input()->non_trivial_functional_dependencies();

Expand Down
2 changes: 2 additions & 0 deletions src/lib/logical_query_plan/aggregate_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class AggregateNode : public EnableMakeForLQPNode<AggregateNode>, public Abstrac

OrderDependencies order_dependencies() const override;

InclusionDependencies inclusion_dependencies() const override;

// Returns non-trivial FDs from the left input node that remain valid.
FunctionalDependencies non_trivial_functional_dependencies() const override;

Expand Down
5 changes: 5 additions & 0 deletions src/lib/logical_query_plan/alias_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "expression/abstract_expression.hpp"
#include "expression/expression_utils.hpp"
#include "logical_query_plan/abstract_lqp_node.hpp"
#include "logical_query_plan/data_dependencies/inclusion_dependency.hpp"
#include "logical_query_plan/data_dependencies/order_dependency.hpp"
#include "logical_query_plan/data_dependencies/unique_column_combination.hpp"
#include "types.hpp"
Expand Down Expand Up @@ -54,6 +55,10 @@ OrderDependencies AliasNode::order_dependencies() const {
return _forward_left_order_dependencies();
}

InclusionDependencies AliasNode::inclusion_dependencies() const {
return _forward_left_inclusion_dependencies();
}

size_t AliasNode::_on_shallow_hash() const {
auto hash = size_t{0};
for (const auto& alias : aliases) {
Expand Down
2 changes: 2 additions & 0 deletions src/lib/logical_query_plan/alias_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class AliasNode : public EnableMakeForLQPNode<AliasNode>, public AbstractLQPNode

OrderDependencies order_dependencies() const override;

InclusionDependencies inclusion_dependencies() const override;

const std::vector<std::string> aliases;

protected:
Expand Down
Loading
0