8000 Parse and represent window functions in the LQP by dey4ss · Pull Request #2574 · hyrise/hyrise · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Parse and represent window functions in the LQP #2574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
e88d774
set parser to dev
dey4ss May 31, 2023
6c15845
WIP
dey4ss May 31, 2023
3a8891f
add WindowNode
dey4ss Jun 1, 2023
8d6c8fd
permit window functions only in select list
dey4ss Jun 1, 2023
438196f
merge
dey4ss Jun 1, 2023
29d2b8d
verify that WindowNode is only followed by allowed nodes
dey4ss Jun 1, 2023
c8885a0
parse window description
dey4ss Jun 2, 2023
0a52a34
doc
dey4ss Jun 3, 2023
fda892e
rename AggregateExpression
dey4ss Jun 5, 2023
a14272c
[WIP] translate window expressions to window nodes
dey4ss Jun 5, 2023
d1ea782
fix
dey4ss Jun 5, 2023
4b7588a
ensure ordered results
dey4ss Jun 6, 2023
ee01276
enable projections by any means
dey4ss Jun 6, 2023
6c72aa2
merge
dey4ss Jun 6, 2023
f83e18d
parse everything correctly
dey4ss Jun 6, 2023
98fbbfd
rename AggregateTraits, add translation tests
dey4ss Jun 6, 2023
f9691e4
more tests
dey4ss Jun 6, 2023
4850992
doc
dey4ss Jun 6, 2023
fc52c7a
more tests
dey4ss Jun 6, 2023
6b30f21
gnarf
dey4ss Jun 8, 2023
faf3dd0
-.-
dey4ss Jun 8, 2023
84ecf81
some feedback
dey4ss Jun 30, 2023
c7b95ef
merge
dey4ss Jun 30, 2023
2d7e373
some review
dey4ss Jul 10, 2023
676e483
Merge branch 'master' into dey4ss/prepare_dyod23_window
dey4ss Jul 10, 2023
aaed8fe
one more test case
dey4ss Jul 10, 2023
aa47ea2
more feedback
dey4ss Jul 14, 2023
3cec4b1
run SSB and JCC-H test in own stage
dey4ss Jul 17, 2023
49667c1
jcc-h is already executed
dey4ss Jul 17, 2023
8fe21cd
increase timeout
dey4ss Jul 24, 2023
548779c
port output expression fix from martin/perf/concurrent_queue
dey4ss Jul 25, 2023
e71f0af
Merge branch 'dey4ss/separate_ssb_tests' into dey4ss/prepare_dyod23_w…
dey4ss Jul 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions Jenkinsfile
9E88
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ try {
sh "./scripts/test/hyriseConsole_test.py clang-release"
sh "./scripts/test/hyriseServer_test.py clang-release"
sh "./scripts/test/hyriseBenchmarkJoinOrder_test.py clang-release"
sh "./scripts/test/hyriseBenchmarkStarSchema_test.py clang-release"
sh "./scripts/test/hyriseBenchmarkFileBased_test.py clang-release"
sh "cd clang-release && ../scripts/test/hyriseBenchmarkTPCC_test.py ." // Own folder to isolate binary export tests
sh "cd clang-release && ../scripts/test/hyriseBenchmarkTPCH_test.py ." // Own folder to isolate visualization
Expand All @@ -172,17 +171,15 @@ try {
sh "./scripts/test/hyriseConsole_test.py clang-debug"
sh "./scripts/test/hyriseServer_test.py clang-debug"
sh "./scripts/test/hyriseBenchmarkJoinOrder_test.py clang-debug"
sh "./scripts/test/hyriseBenchmarkStarSchema_test.py clang-debug"
sh "./scripts/test/hyriseBenchmarkFileBased_test.py clang-debug"
sh "cd clang-debug && ../scripts/test/hyriseBenchmarkTPCH_test.py ." // Own folder to isolate visualization
sh "cd clang-debug && ../scripts/test/hyriseBenchmarkJCCH_test.py ." // Own folder to isolate visualization
sh "cd clang-debug && ../scripts/test/hyriseBenchmarkJCCH_test.py ." // Own folder to isolate cached data
sh "./scripts/test/hyriseConsole_test.py gcc-debug"
sh "./scripts/test/hyriseServer_test.py gcc-debug"
sh "./scripts/test/hyriseBenchmarkJoinOrder_test.py gcc-debug"
sh "./scripts/test/hyriseBenchmarkStarSchema_test.py gcc-debug"
sh "./scripts/test/hyriseBenchmarkFileBased_test.py gcc-debug"
sh "cd gcc-debug && ../scripts/test/hyriseBenchmarkTPCH_test.py ." // Own folder to isolate visualization
sh "cd gcc-debug && ../scripts/test/hyriseBenchmarkJCCH_test.py ." // Own folder to isolate visualization
sh "cd gcc-debug && ../scripts/test/hyriseBenchmarkJCCH_test.py ." // Own folder to isolate cached data

} else {
Utils.markStageSkippedForConditional("debugSystemTests")
Expand Down Expand Up @@ -245,7 +242,6 @@ try {
sh "./scripts/test/hyriseConsole_test.py gcc-release"
sh "./scripts/test/hyriseServer_test.py gcc-release"
sh "./scripts/test/hyriseBenchmarkJoinOrder_test.py gcc-release"
sh "./scripts/test/hyriseBenchmarkStarSchema_test.py gcc-release"
sh "./scripts/test/hyriseBenchmarkFileBased_test.py gcc-release"
sh "cd gcc-release && ../scripts/test/hyriseBenchmarkTPCC_test.py ." // Own folder to isolate binary export tests
sh "cd gcc-release && ../scripts/test/hyriseBenchmarkTPCH_test.py ." // Own folder to isolate visualization
Expand Down Expand Up @@ -301,6 +297,15 @@ try {
}
}

// We run this test in an own stage since we encountered issues with multiple concurrent calls to the external DB generator.
stage("clangDebugSSBTest") {
if (env.BRANCH_NAME == 'master' || full_ci) {
sh "./scripts/test/hyriseBenchmarkStarSchema_test.py clang-debug"
} else {
Utils.markStageSkippedForConditional("clangDebugSSBTest")
}
}

parallel memcheckReleaseTest: {
stage("memcheckReleaseTest") {
// Runs after the other sanitizers as it depends on gcc-release to be built. With #2402, valgrind now
Expand Down
5 changes: 4 additions & 1 deletion scripts/test/hyriseBenchmarkJCCH_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def main():
build_dir = initialize()

# Run JCC-H and validate its output using pexpect and check if all queries were successfully verified with sqlite.
# Run JCC-H, validate its output using pexpect, and check if all queries were successfully verified with sqlite.
arguments = {}
arguments["--scale"] = ".01"
arguments["--chunk_size"] = "10000"
Expand All @@ -33,6 +33,9 @@ def main():
benchmark.expect_exact("Max runs per item is 100")
benchmark.expect_exact("Max duration per item is 10 seconds")
benchmark.expect_exact("Warmup duration per item is 10 seconds")
benchmark.expect_exact(
"- Automatically verifying results with SQLite. This will make the performance numbers invalid."
)
benchmark.expect_exact("Benchmarking Queries: [ 2, 4, 6 ]")
benchmark.expect_exact("JCC-H scale factor is 0.01")
benchmark.expect_exact("Using prepared statements: no")
Expand Down
2 changes: 1 addition & 1 deletion scripts/test/hyriseBenchmarkStarSchema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def main():
build_dir = initialize()

# RunSSB and validate its output using pexpect and check if all queries were successfully verified with sqlite.
# Run SSB, validate its output using pexpect, and check if all queries were successfully verified with sqlite.
arguments = {}
arguments["--queries"] = "'1.1,1.2,2.2,3.3'"
arguments["--scale"] = "0.01"
Expand Down
8 changes: 4 additions & 4 deletions scripts/test/hyriseConsole_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ def main():
console.sendline("select * from meta_tables")
console.expect("0 rows total")

# Test SSB generation.
console.sendline("generate_ssb 0.01")
console.expect("Generating tables done")
# Test TPC-DS generation.
console.sendline("generate_tpcds 1")
console.expect("Generating tables done", timeout=600)
console.sendline("select * from meta_tables")
console.expect("5 rows total")
console.expect("24 rows total")

# Test meta table modification.
console.sendline("insert into meta_settings values ('foo', 'bar', 'baz')")
Expand Down
28 changes: 14 additions & 14 deletions src/benchmark/operators/aggregate_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ using namespace expression_functional; // NOLINT(build/namespaces)
BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateHash)(benchmark::State& state) {
_clear_cache();

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {ColumnID{0} /* "a" */};

Expand All @@ -33,8 +33,8 @@ BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateHash)(benchmark::State& stat
BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortNotSortedNoGroupBy)(benchmark::State& state) {
_clear_cache();

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {};

Expand All @@ -49,8 +49,8 @@ BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortNotSortedNoGroupBy)(benc
BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortSortedNoGroupBy)(benchmark::State& state) {
_clear_cache();

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {};

Expand All @@ -73,8 +73,8 @@ BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortSortedNoGroupBy)(benchma
BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortNotSortedOneGroupBy)(benchmark::State& state) {
_clear_cache();

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {ColumnID{1}};

Expand All @@ -89,8 +89,8 @@ BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortNotSortedOneGroupBy)(ben
BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortSortedOneGroupBy)(benchmark::State& state) {
_clear_cache();

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {ColumnID{1}};

Expand All @@ -113,8 +113,8 @@ BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortSortedOneGroupBy)(benchm
BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortNotSortedMultipleGroupBy)(benchmark::State& state) {
_clear_cache();

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {ColumnID{0}, ColumnID{1}};

Expand All @@ -129,8 +129,8 @@ BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortNotSortedMultipleGroupBy
BENCHMARK_F(MicroBenchmarkBasicFixture, BM_AggregateSortSortedMultipleGroupBy)(benchmark::State& state) {
_clear_cache();

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(min_(pqp_column_(ColumnID{1}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {ColumnID{0}, ColumnID{1}};

Expand Down
4 changes: 2 additions & 2 deletions src/benchmark/operators/join_aggregate_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ void BM_Join_Aggregate(benchmark::State& state) {
auto operator_join_predicate =
OperatorJoinPredicate(std::make_pair(ColumnID{0}, ColumnID{0}), PredicateCondition::Equals);

auto aggregates = std::vector<std::shared_ptr<AggregateExpression>>{
std::static_pointer_cast<AggregateExpression>(avg_(pqp_column_(ColumnID{0}, DataType::Int, false, "b")))};
auto aggregates = std::vector<std::shared_ptr<WindowFunctionExpression>>{
std::static_pointer_cast<WindowFunctionExpression>(avg_(pqp_column_(ColumnID{0}, DataType::Int, false, "b")))};

std::vector<ColumnID> groupby = {ColumnID{0}, ColumnID{2}};

Expand Down
4 changes: 2 additions & 2 deletions src/benchmark/tpch_data_micro_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#include "micro_benchmark_basic_fixture.hpp"

#include "benchmark_config.hpp"
#include "expression/aggregate_expression.hpp"
#include "expression/expression_functional.hpp"
#include "expression/window_function_expression.hpp"
#include "hyrise.hpp"
#include "logical_query_plan/join_node.hpp"
#include "logical_query_plan/lqp_translator.hpp"
Expand Down Expand Up @@ -218,7 +218,7 @@ BENCHMARK_F(TPCHDataMicroBenchmarkFixture, BM_ScanAggregate)(benchmark::State& s
const auto mocked_table_scan_output = sorted_lineitem->get_output();
const ColumnID group_by_column = l_orderkey_id;
const std::vector<ColumnID> group_by = {l_orderkey_id};
const auto aggregate_expressions = std::vector<std::shared_ptr<AggregateExpression>>{
const auto aggregate_expressions = std::vector<std::shared_ptr<WindowFunctionExpression>>{
count_(pqp_column_(group_by_column, mocked_table_scan_output->column_data_type(group_by_column),
mocked_table_scan_output->column_is_nullable(group_by_column),
mocked_table_scan_output->column_name(group_by_column)))};
Expand Down
83 changes: 44 additions & 39 deletions src/bin/console/console.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,64 +767,69 @@ int Console::_visualize(const std::string& input) {
}

if (no_execute && !sql.empty() && _sql_pipeline->requires_execution()) {
out("Error: We do not support the visualization of multiple dependant statements in 'noexec' mode.\n");
out("Error: We do not support the visualization of multiple dependent statements in 'noexec' mode.\n");
return ReturnCode::Error;
}

const auto img_filename = plan_type_str + ".png";

switch (plan_type) {
case PlanType::LQP:
case PlanType::UnoptLQP: {
auto lqp_roots = std::vector<std::shared_ptr<AbstractLQPNode>>{};
try {
switch (plan_type) {
case PlanType::LQP:
case PlanType::UnoptLQP: {
auto lqp_roots = std::vector<std::shared_ptr<AbstractLQPNode>>{};

const auto& lqps = (plan_type == PlanType::LQP) ? _sql_pipeline->get_optimized_logical_plans()
: _sql_pipeline->get_unoptimized_logical_plans();
const auto& lqps = (plan_type == PlanType::LQP) ? _sql_pipeline->get_optimized_logical_plans()
: _sql_pipeline->get_unoptimized_logical_plans();

lqp_roots.reserve(lqps.size());
lqp_roots.reserve(lqps.size());

for (const auto& lqp : lqps) {
lqp_roots.emplace_back(lqp);
}
for (const auto& lqp : lqps) {
lqp_roots.emplace_back(lqp);
}

auto visualizer = LQPVisualizer{};
visualizer.visualize(lqp_roots, img_filename);
} break;
auto visualizer = LQPVisualizer{};
visualizer.visualize(lqp_roots, img_filename);
} break;

case PlanType::PQP: {
if (!no_execute) {
_sql_pipeline->get_result_table();
case PlanType::PQP: {
if (!no_execute) {
_sql_pipeline->get_result_table();

// Store the transaction context as potentially modified by the pipeline. It might be a new context if a
// transaction was started or nullptr if we are in auto-commit mode or the last transaction was finished.
_explicitly_created_transaction_context = _sql_pipeline->transaction_context();
}
// Store the transaction context as potentially modified by the pipeline. It might be a new context if a
// transaction was started or nullptr if we are in auto-commit mode or the last transaction was finished.
_explicitly_created_transaction_context = _sql_pipeline->transaction_context();
}

auto visualizer = PQPVisualizer{};
visualizer.visualize(_sql_pipeline->get_physical_plans(), img_filename);
} break;
auto visualizer = PQPVisualizer{};
visualizer.visualize(_sql_pipeline->get_physical_plans(), img_filename);
} break;

case PlanType::Joins: {
out("NOTE: Join graphs will show only Cross and Inner joins, not Semi, Left, Right, Full outer, "
"AntiNullAsTrue and AntiNullAsFalse joins.\n");
case PlanType::Joins: {
out("NOTE: Join graphs will show only Cross and Inner joins, not Semi, Left, Right, Full outer, "
"AntiNullAsTrue and AntiNullAsFalse joins.\n");

auto join_graphs = std::vector<JoinGraph>{};
auto join_graphs = std::vector<JoinGraph>{};

const auto& lqps = _sql_pipeline->get_optimized_logical_plans();
for (const auto& lqp : lqps) {
const auto sub_lqps = lqp_find_subplan_roots(lqp);
const auto& lqps = _sql_pipeline->get_optimized_logical_plans();
for (const auto& lqp : lqps) {
const auto sub_lqps = lqp_find_subplan_roots(lqp);

for (const auto& sub_lqp : sub_lqps) {
const auto sub_lqp_join_graphs = JoinGraph::build_all_in_lqp(sub_lqp);
for (const auto& sub_lqp_join_graph : sub_lqp_join_graphs) {
join_graphs.emplace_back(sub_lqp_join_graph);
for (const auto& sub_lqp : sub_lqps) {
const auto sub_lqp_join_graphs = JoinGraph::build_all_in_lqp(sub_lqp);
for (const auto& sub_lqp_join_graph : sub_lqp_join_graphs) {
join_graphs.emplace_back(sub_lqp_join_graph);
}
}
}
}

auto visualizer = JoinGraphVisualizer{};
visualizer.visualize(join_graphs, img_filename);
} break;
auto visualizer = JoinGraphVisualizer{};
visualizer.visualize(join_graphs, img_filename);
} break;
}
} catch (const InvalidInputException& exception) {
out(std::string(exception.what()) + '\n');
return false;
}

// NOLINTBEGIN(concurrency-mt-unsafe) - system() is not thread-safe, but it's not used concurrently here.
Expand Down
10 changes: 7 additions & 3 deletions src/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ set(
expression/abstract_expression.hpp
expression/abstract_predicate_expression.cpp
expression/abstract_predicate_expression.hpp
expression/aggregate_expression.cpp
expression/aggregate_expression.hpp
expression/arithmetic_expression.cpp
expression/arithmetic_expression.hpp
expression/between_expression.cpp
Expand Down Expand Up @@ -81,6 +79,10 @@ set(
expression/unary_minus_expression.hpp
expression/value_expression.cpp
expression/value_expression.hpp
expression/window_expression.cpp
expression/window_expression.hpp
expression/window_function_expression.cpp
expression/window_function_expression.hpp
hyrise.cpp
hyrise.hpp
import_export/binary/binary_parser.cpp
Expand Down Expand Up @@ -164,6 +166,8 @@ set(
logical_query_plan/update_node.hpp
logical_query_plan/validate_node.cpp
logical_query_plan/validate_node.hpp
logical_query_plan/window_node.cpp
logical_query_plan/window_node.hpp
lossless_cast.cpp
lossless_cast.hpp
lossy_cast.hpp
Expand All @@ -180,7 +184,7 @@ set(
operators/abstract_read_only_operator.hpp
operators/abstract_read_write_operator.cpp
operators/abstract_read_write_operator.hpp
operators/aggregate/aggregate_traits.hpp
operators/aggregate/window_function_traits.hpp
operators/aggregate_hash.cpp
operators/aggregate_hash.hpp
operators/aggregate_sort.cpp
Expand Down
4 changes: 2 additions & 2 deletions src/lib/expression/abstract_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
#include "expression_utils.hpp"
#include "utils/assert.hpp"

using namespace std::string_literals; // NOLINT

namespace hyrise {

using namespace std::string_literals; // NOLINT(build/namespaces)

AbstractExpression::AbstractExpression(const ExpressionType init_type,
const std::vector<std::shared_ptr<AbstractExpression>>& init_arguments)
: type(init_type), arguments(init_arguments) {}
Expand Down
5 changes: 3 additions & 2 deletions src/lib/expression/abstract_expression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class AbstractLQPNode;
class AbstractOperator;

enum class ExpressionType {
Aggregate,
Arithmetic,
Cast,
Case,
Expand All @@ -33,7 +32,9 @@ enum class ExpressionType {
PQPSubquery,
LQPSubquery,
UnaryMinus,
Value
Value,
Window,
WindowFunction
};

/**
Expand Down
Loading
0