diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b7cd31f3d740c..ef4026379518d 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -666,6 +666,13 @@ else() endif() endif() +# Remove these two lines once https://github.com/substrait-io/substrait/pull/342 merges +set(ENV{ARROW_SUBSTRAIT_URL} + "https://github.com/substrait-io/substrait/archive/e59008b6b202f8af06c2266991161b1e45cb056a.tar.gz" +) +set(ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM + "f64629cb377fcc62c9d3e8fe69fa6a4cf326f34d756e03db84843c5cce8d04cd") + if(DEFINED ENV{ARROW_SUBSTRAIT_URL}) set(SUBSTRAIT_SOURCE_URL "$ENV{ARROW_SUBSTRAIT_URL}") else() diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 7495d1a34e1fa..343d8cd9ee647 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -52,8 +52,8 @@ Id NormalizeFunctionName(Id id) { } // namespace -Status DecodeArg(const substrait::FunctionArgument& arg, uint32_t idx, - SubstraitCall* call, const ExtensionSet& ext_set, +Status DecodeArg(const substrait::FunctionArgument& arg, int idx, SubstraitCall* call, + const ExtensionSet& ext_set, const ConversionOptions& conversion_options) { if (arg.has_enum_()) { const substrait::FunctionArgument::Enum& enum_val = arg.enum_(); @@ -61,9 +61,6 @@ Status DecodeArg(const substrait::FunctionArgument& arg, uint32_t idx, case substrait::FunctionArgument::Enum::EnumKindCase::kSpecified: call->SetEnumArg(idx, enum_val.specified()); break; - case substrait::FunctionArgument::Enum::EnumKindCase::kUnspecified: - call->SetEnumArg(idx, std::nullopt); - break; default: return Status::Invalid("Unrecognized enum kind case: ", enum_val.enum_kind_case()); @@ -80,6 +77,19 @@ Status DecodeArg(const substrait::FunctionArgument& arg, uint32_t idx, return Status::OK(); } +Status DecodeOption(const substrait::FunctionOption& opt, SubstraitCall* call) { + std::vector prefs; + if (opt.preference_size() == 0) { + return Status::Invalid("Invalid Substrait plan. The option ", opt.name(), + " is specified but does not list any choices"); + } + for (const auto& preference : opt.preference()) { + prefs.push_back(preference); + } + call->SetOption(opt.name(), prefs); + return Status::OK(); +} + Result DecodeScalarFunction( Id id, const substrait::Expression::ScalarFunction& scalar_fn, const ExtensionSet& ext_set, const ConversionOptions& conversion_options) { @@ -87,8 +97,11 @@ Result DecodeScalarFunction( FromProto(scalar_fn.output_type(), ext_set, conversion_options)); SubstraitCall call(id, output_type_and_nullable.first, output_type_and_nullable.second); for (int i = 0; i < scalar_fn.arguments_size(); i++) { - ARROW_RETURN_NOT_OK(DecodeArg(scalar_fn.arguments(i), static_cast(i), &call, - ext_set, conversion_options)); + ARROW_RETURN_NOT_OK( + DecodeArg(scalar_fn.arguments(i), i, &call, ext_set, conversion_options)); + } + for (const auto& opt : scalar_fn.options()) { + ARROW_RETURN_NOT_OK(DecodeOption(opt, &call)); } return std::move(call); } @@ -926,16 +939,12 @@ Result> EncodeSubstraitCa ToProto(*call.output_type(), call.output_nullable(), ext_set, conversion_options)); scalar_fn->set_allocated_output_type(output_type.release()); - for (uint32_t i = 0; i < call.size(); i++) { + for (int i = 0; i < call.size(); i++) { substrait::FunctionArgument* arg = scalar_fn->add_arguments(); if (call.HasEnumArg(i)) { auto enum_val = std::make_unique(); - ARROW_ASSIGN_OR_RAISE(std::optional enum_arg, call.GetEnumArg(i)); - if (enum_arg) { - enum_val->set_specified(std::string(*enum_arg)); - } else { - enum_val->set_allocated_unspecified(new google::protobuf::Empty()); - } + ARROW_ASSIGN_OR_RAISE(std::string_view enum_arg, call.GetEnumArg(i)); + enum_val->set_specified(std::string(enum_arg)); arg->set_allocated_enum_(enum_val.release()); } else if (call.HasValueArg(i)) { ARROW_ASSIGN_OR_RAISE(compute::Expression value_arg, call.GetValueArg(i)); diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 63ba598c823eb..21d70e3333004 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -25,6 +25,7 @@ #include "arrow/engine/substrait/expression_internal.h" #include "arrow/util/hash_util.h" #include "arrow/util/hashing.h" +#include "arrow/util/string.h" namespace arrow { namespace engine { @@ -121,7 +122,7 @@ class IdStorageImpl : public IdStorage { std::unique_ptr IdStorage::Make() { return std::make_unique(); } -Result> SubstraitCall::GetEnumArg(uint32_t index) const { +Result SubstraitCall::GetEnumArg(int index) const { if (index >= size_) { return Status::Invalid("Expected Substrait call to have an enum argument at index ", index, " but it did not have enough arguments"); @@ -134,16 +135,16 @@ Result> SubstraitCall::GetEnumArg(uint32_t index return enum_arg_it->second; } -bool SubstraitCall::HasEnumArg(uint32_t index) const { +bool SubstraitCall::HasEnumArg(int index) const { return enum_args_.find(index) != enum_args_.end(); } -void SubstraitCall::SetEnumArg(uint32_t index, std::optional enum_arg) { +void SubstraitCall::SetEnumArg(int index, std::string enum_arg) { size_ = std::max(size_, index + 1); enum_args_[index] = std::move(enum_arg); } -Result SubstraitCall::GetValueArg(uint32_t index) const { +Result SubstraitCall::GetValueArg(int index) const { if (index >= size_) { return Status::Invalid("Expected Substrait call to have a value argument at index ", index, " but it did not have enough arguments"); @@ -156,15 +157,32 @@ Result SubstraitCall::GetValueArg(uint32_t index) const { return value_arg_it->second; } -bool SubstraitCall::HasValueArg(uint32_t index) const { +bool SubstraitCall::HasValueArg(int index) const { return value_args_.find(index) != value_args_.end(); } -void SubstraitCall::SetValueArg(uint32_t index, compute::Expression value_arg) { +void SubstraitCall::SetValueArg(int index, compute::Expression value_arg) { size_ = std::max(size_, index + 1); value_args_[index] = std::move(value_arg); } +std::optional const*> SubstraitCall::GetOption( + std::string_view option_name) const { + auto opt = options_.find(std::string(option_name)); + if (opt == options_.end()) { + return std::nullopt; + } + return &opt->second; +} + +void SubstraitCall::SetOption(std::string_view option_name, + const std::vector& option_preferences) { + auto& prefs = options_[std::string(option_name)]; + for (std::string_view pref : option_preferences) { + prefs.emplace_back(pref); + } +} + // A builder used when creating a Substrait plan from an Arrow execution plan. In // that situation we do not have a set of anchor values already defined so we keep // a map of what Ids we have seen. @@ -645,50 +663,91 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry { }; template -using EnumParser = std::function(std::optional)>; - -template -EnumParser GetEnumParser(const std::vector& options) { - std::unordered_map parse_map; - for (std::size_t i = 0; i < options.size(); i++) { - parse_map[options[i]] = static_cast(i + 1); +class EnumParser { + public: + explicit EnumParser(const std::vector& options) { + for (std::size_t i = 0; i < options.size(); i++) { + parse_map_[options[i]] = static_cast(i + 1); + reverse_map_[static_cast(i + 1)] = options[i]; + } } - return [parse_map](std::optional enum_val) -> Result { - if (!enum_val) { - // Assumes 0 is always kUnspecified in Enum - return static_cast(0); + + Result Parse(std::string_view enum_val) const { + auto it = parse_map_.find(std::string(enum_val)); + if (it == parse_map_.end()) { + return Status::NotImplemented("The value ", enum_val, + " is not an expected enum value"); } - auto maybe_parsed = parse_map.find(std::string(*enum_val)); - if (maybe_parsed == parse_map.end()) { - return Status::Invalid("The value ", *enum_val, " is not an expected enum value"); + return it->second; + } + + std::string ImplementedOptionsAsString( + const std::vector& implemented_opts) const { + std::vector opt_strs; + for (const Enum& implemented_opt : implemented_opts) { + auto it = reverse_map_.find(implemented_opt); + if (it == reverse_map_.end()) { + opt_strs.emplace_back("Unknown"); + } else { + opt_strs.emplace_back(it->second); + } } - return maybe_parsed->second; - }; -} + return arrow::internal::JoinStrings(opt_strs, ", "); + } + + private: + std::unordered_map parse_map_; + std::unordered_map reverse_map_; +}; enum class TemporalComponent { kUnspecified = 0, kYear, kMonth, kDay, kSecond }; static std::vector kTemporalComponentOptions = {"YEAR", "MONTH", "DAY", "SECOND"}; -static EnumParser kTemporalComponentParser = - GetEnumParser(kTemporalComponentOptions); +static EnumParser kTemporalComponentParser(kTemporalComponentOptions); enum class OverflowBehavior { kUnspecified = 0, kSilent, kSaturate, kError }; static std::vector kOverflowOptions = {"SILENT", "SATURATE", "ERROR"}; -static EnumParser kOverflowParser = - GetEnumParser(kOverflowOptions); +static EnumParser kOverflowParser(kOverflowOptions); template -Result ParseEnumArg(const SubstraitCall& call, uint32_t arg_index, +Result ParseOptionOrElse(const SubstraitCall& call, std::string_view option_name, + const EnumParser& parser, + const std::vector& implemented_options, + Enum fallback) { + std::optional const*> enum_arg = call.GetOption(option_name); + if (!enum_arg.has_value()) { + return fallback; + } + std::vector const* prefs = *enum_arg; + for (const std::string& pref : *prefs) { + ARROW_ASSIGN_OR_RAISE(Enum parsed, parser.Parse(pref)); + for (Enum implemented_opt : implemented_options) { + if (implemented_opt == parsed) { + return parsed; + } + } + } + + // Prepare error message + return Status::NotImplemented( + "During a call to a function with id ", call.id().uri, "#", call.id().name, + " the plan requested the option ", option_name, " to be one of [", + arrow::internal::JoinStrings(*prefs, ", "), + "] but the only supported options are [", + parser.ImplementedOptionsAsString(implemented_options), "]"); +} + +template +Result ParseEnumArg(const SubstraitCall& call, int arg_index, const EnumParser& parser) { - ARROW_ASSIGN_OR_RAISE(std::optional enum_arg, - call.GetEnumArg(arg_index)); - return parser(enum_arg); + ARROW_ASSIGN_OR_RAISE(std::string_view enum_val, call.GetEnumArg(arg_index)); + return parser.Parse(enum_val); } Result> GetValueArgs(const SubstraitCall& call, int start_index) { std::vector expressions; - for (uint32_t index = start_index; index < call.size(); index++) { + for (int index = start_index; index < call.size(); index++) { ARROW_ASSIGN_OR_RAISE(compute::Expression arg, call.GetValueArg(index)); expressions.push_back(arg); } @@ -698,13 +757,13 @@ Result> GetValueArgs(const SubstraitCall& call, ExtensionIdRegistry::SubstraitCallToArrow DecodeOptionlessOverflowableArithmetic( const std::string& function_name) { return [function_name](const SubstraitCall& call) -> Result { - ARROW_ASSIGN_OR_RAISE(OverflowBehavior overflow_behavior, - ParseEnumArg(call, 0, kOverflowParser)); + ARROW_ASSIGN_OR_RAISE( + OverflowBehavior overflow_behavior, + ParseOptionOrElse(call, "overflow", kOverflowParser, + {OverflowBehavior::kSilent, OverflowBehavior::kError}, + OverflowBehavior::kSilent)); ARROW_ASSIGN_OR_RAISE(std::vector value_args, - GetValueArgs(call, 1)); - if (overflow_behavior == OverflowBehavior::kUnspecified) { - overflow_behavior = OverflowBehavior::kSilent; - } + GetValueArgs(call, 0)); if (overflow_behavior == OverflowBehavior::kSilent) { return arrow::compute::call(function_name, std::move(value_args)); } else if (overflow_behavior == OverflowBehavior::kError) { @@ -727,12 +786,12 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeOptionlessOverflowableArithmetic SubstraitCall substrait_call(substrait_fn_id, call.type.GetSharedPtr(), /*nullable=*/true); if (kChecked) { - substrait_call.SetEnumArg(0, "ERROR"); + substrait_call.SetOption("overflow", {"ERROR"}); } else { - substrait_call.SetEnumArg(0, "SILENT"); + substrait_call.SetOption("overflow", {"SILENT"}); } for (std::size_t i = 0; i < call.arguments.size(); i++) { - substrait_call.SetValueArg(static_cast(i + 1), call.arguments[i]); + substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } return std::move(substrait_call); }; @@ -746,14 +805,14 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeOptionlessComparison(Id substrai SubstraitCall substrait_call(substrait_fn_id, call.type.GetSharedPtr(), /*nullable=*/true); for (std::size_t i = 0; i < call.arguments.size(); i++) { - substrait_call.SetValueArg(static_cast(i), call.arguments[i]); + substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } return std::move(substrait_call); }; } ExtensionIdRegistry::SubstraitCallToArrow DecodeOptionlessBasicMapping( - const std::string& function_name, uint32_t max_args) { + const std::string& function_name, int max_args) { return [function_name, max_args](const SubstraitCall& call) -> Result { if (call.size() > max_args) { diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index e2e09d0d92b34..9c8a7013e596d 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -119,13 +119,17 @@ class SubstraitCall { bool output_nullable() const { return output_nullable_; } bool is_hash() const { return is_hash_; } - bool HasEnumArg(uint32_t index) const; - Result> GetEnumArg(uint32_t index) const; - void SetEnumArg(uint32_t index, std::optional enum_arg); - Result GetValueArg(uint32_t index) const; - bool HasValueArg(uint32_t index) const; - void SetValueArg(uint32_t index, compute::Expression value_arg); - uint32_t size() const { return size_; } + bool HasEnumArg(int index) const; + Result GetEnumArg(int index) const; + void SetEnumArg(int index, std::string enum_arg); + Result GetValueArg(int index) const; + bool HasValueArg(int index) const; + void SetValueArg(int index, compute::Expression value_arg); + std::optional const*> GetOption( + std::string_view option_name) const; + void SetOption(std::string_view option_name, + const std::vector& option_preferences); + int size() const { return size_; } private: Id id_; @@ -134,9 +138,10 @@ class SubstraitCall { // Only needed when converting from Substrait -> Arrow aggregates. The // Arrow function name depends on whether or not there are any groups bool is_hash_; - std::unordered_map> enum_args_; - std::unordered_map value_args_; - uint32_t size_ = 0; + std::unordered_map enum_args_; + std::unordered_map value_args_; + std::unordered_map> options_; + int size_ = 0; }; /// Substrait identifies functions and custom data types using a (uri, name) pair. diff --git a/cpp/src/arrow/engine/substrait/function_test.cc b/cpp/src/arrow/engine/substrait/function_test.cc index a401011839fac..e7a26f7dc38fe 100644 --- a/cpp/src/arrow/engine/substrait/function_test.cc +++ b/cpp/src/arrow/engine/substrait/function_test.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/array.h" @@ -43,6 +44,7 @@ namespace engine { struct FunctionTestCase { Id function_id; std::vector arguments; + std::unordered_map> options; std::vector> data_types; // For a test case that should fail just use the empty string std::string expected_output; @@ -98,10 +100,11 @@ Result> PlanFromTestCase( const FunctionTestCase& test_case, std::shared_ptr* output_table) { ARROW_ASSIGN_OR_RAISE(std::shared_ptr
input_table, GetInputTable(test_case.arguments, test_case.data_types)); - ARROW_ASSIGN_OR_RAISE(std::shared_ptr substrait, - internal::CreateScanProjectSubstrait( - test_case.function_id, input_table, test_case.arguments, - test_case.data_types, *test_case.expected_output_type)); + ARROW_ASSIGN_OR_RAISE( + std::shared_ptr substrait, + internal::CreateScanProjectSubstrait( + test_case.function_id, input_table, test_case.arguments, test_case.options, + test_case.data_types, *test_case.expected_output_type)); std::shared_ptr consumer = std::make_shared(output_table, default_memory_pool()); @@ -144,6 +147,8 @@ void CheckValidTestCases(const std::vector& valid_cases) { void CheckErrorTestCases(const std::vector& error_cases) { for (const FunctionTestCase& test_case : error_cases) { + ARROW_SCOPED_TRACE("func=", test_case.function_id.uri, "#", + test_case.function_id.name); std::shared_ptr
output_table; ASSERT_OK_AND_ASSIGN(std::shared_ptr plan, PlanFromTestCase(test_case, &output_table)); @@ -152,165 +157,217 @@ void CheckErrorTestCases(const std::vector& error_cases) { } } +template +void CheckNonYetImplementedTestCase(const FunctionTestCase& test_case, + ErrorMatcher error_matcher) { + ARROW_SCOPED_TRACE("func=", test_case.function_id.uri, "#", test_case.function_id.name); + std::shared_ptr
output_table; + EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, error_matcher, + PlanFromTestCase(test_case, &output_table)); +} + +static const std::unordered_map> kNoOptions; + // These are not meant to be an exhaustive test of Substrait // conformance. Instead, we should test just enough to ensure // we are mapping to the correct function TEST(FunctionMapping, ValidCases) { const std::vector valid_test_cases = { {{kSubstraitArithmeticFunctionsUri, "add"}, - {"SILENT", "127", "10"}, - {nullptr, int8(), int8()}, + {"127", "10"}, + {{"overflow", {"SILENT", "ERROR"}}}, + {int8(), int8()}, "-119", int8()}, {{kSubstraitArithmeticFunctionsUri, "subtract"}, - {"SILENT", "-119", "10"}, - {nullptr, int8(), int8()}, + {"-119", "10"}, + {{"overflow", {"SILENT", "ERROR"}}}, + {int8(), int8()}, "127", int8()}, {{kSubstraitArithmeticFunctionsUri, "multiply"}, - {"SILENT", "10", "13"}, - {nullptr, int8(), int8()}, + {"10", "13"}, + {{"overflow", {"SILENT", "ERROR"}}}, + {int8(), int8()}, "-126", int8()}, {{kSubstraitArithmeticFunctionsUri, "divide"}, - {"SILENT", "-128", "-1"}, - {nullptr, int8(), int8()}, + {"-128", "-1"}, + {{"overflow", {"SILENT", "ERROR"}}}, + {int8(), int8()}, "0", int8()}, {{kSubstraitBooleanFunctionsUri, "or"}, {"1", ""}, + kNoOptions, {boolean(), boolean()}, "1", boolean()}, {{kSubstraitBooleanFunctionsUri, "and"}, {"1", ""}, + kNoOptions, {boolean(), boolean()}, "", boolean()}, {{kSubstraitBooleanFunctionsUri, "xor"}, {"1", "1"}, + kNoOptions, {boolean(), boolean()}, "0", boolean()}, - {{kSubstraitBooleanFunctionsUri, "not"}, {"1"}, {boolean()}, "0", boolean()}, + {{kSubstraitBooleanFunctionsUri, "not"}, + {"1"}, + kNoOptions, + {boolean()}, + "0", + boolean()}, {{kSubstraitComparisonFunctionsUri, "equal"}, {"57", "57"}, + kNoOptions, {int8(), int8()}, "1", boolean()}, - {{kSubstraitComparisonFunctionsUri, "is_null"}, {"abc"}, {utf8()}, "0", boolean()}, + {{kSubstraitComparisonFunctionsUri, "is_null"}, + {"abc"}, + kNoOptions, + {utf8()}, + "0", + boolean()}, {{kSubstraitComparisonFunctionsUri, "is_not_null"}, {"57"}, + kNoOptions, {int8()}, "1", boolean()}, {{kSubstraitComparisonFunctionsUri, "not_equal"}, {"57", "57"}, + kNoOptions, {int8(), int8()}, "0", boolean()}, {{kSubstraitComparisonFunctionsUri, "lt"}, {"57", "80"}, + kNoOptions, {int8(), int8()}, "1", boolean()}, {{kSubstraitComparisonFunctionsUri, "lt"}, {"57", "57"}, + kNoOptions, {int8(), int8()}, "0", boolean()}, {{kSubstraitComparisonFunctionsUri, "gt"}, {"57", "30"}, + kNoOptions, {int8(), int8()}, "1", boolean()}, {{kSubstraitComparisonFunctionsUri, "gt"}, {"57", "57"}, + kNoOptions, {int8(), int8()}, "0", boolean()}, {{kSubstraitComparisonFunctionsUri, "lte"}, {"57", "57"}, + kNoOptions, {int8(), int8()}, "1", boolean()}, {{kSubstraitComparisonFunctionsUri, "lte"}, {"50", "57"}, + kNoOptions, {int8(), int8()}, "1", boolean()}, {{kSubstraitComparisonFunctionsUri, "gte"}, {"57", "57"}, + kNoOptions, {int8(), int8()}, "1", boolean()}, {{kSubstraitComparisonFunctionsUri, "gte"}, {"60", "57"}, + kNoOptions, {int8(), int8()}, "1", boolean()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"YEAR", "2022-07-15T14:33:14"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO)}, "2022", int64()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"MONTH", "2022-07-15T14:33:14"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO)}, "7", int64()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"DAY", "2022-07-15T14:33:14"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO)}, "15", int64()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"SECOND", "2022-07-15T14:33:14"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO)}, "14", int64()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"YEAR", "2022-07-15T14:33:14Z"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, "2022", int64()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"MONTH", "2022-07-15T14:33:14Z"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, "7", int64()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"DAY", "2022-07-15T14:33:14Z"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, "15", int64()}, {{kSubstraitDatetimeFunctionsUri, "extract"}, {"SECOND", "2022-07-15T14:33:14Z"}, + kNoOptions, {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, "14", int64()}, {{kSubstraitDatetimeFunctionsUri, "lt"}, {"2022-07-15T14:33:14", "2022-07-15T14:33:20"}, + kNoOptions, {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, "1", boolean()}, {{kSubstraitDatetimeFunctionsUri, "lte"}, {"2022-07-15T14:33:14", "2022-07-15T14:33:14"}, + kNoOptions, {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, "1", boolean()}, {{kSubstraitDatetimeFunctionsUri, "gt"}, {"2022-07-15T14:33:30", "2022-07-15T14:33:14"}, + kNoOptions, {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, "1", boolean()}, {{kSubstraitDatetimeFunctionsUri, "gte"}, {"2022-07-15T14:33:14", "2022-07-15T14:33:14"}, + kNoOptions, {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, "1", boolean()}, {{kSubstraitStringFunctionsUri, "concat"}, {"abc", "def"}, + kNoOptions, {utf8(), utf8()}, "abcdef", utf8()}}; @@ -320,28 +377,55 @@ TEST(FunctionMapping, ValidCases) { TEST(FunctionMapping, ErrorCases) { const std::vector error_test_cases = { {{kSubstraitArithmeticFunctionsUri, "add"}, - {"ERROR", "127", "10"}, - {nullptr, int8(), int8()}, + {"127", "10"}, + {{"overflow", {"ERROR", "SILENT"}}}, + {int8(), int8()}, "", int8()}, {{kSubstraitArithmeticFunctionsUri, "subtract"}, - {"ERROR", "-119", "10"}, - {nullptr, int8(), int8()}, + {"-119", "10"}, + {{"overflow", {"ERROR", "SILENT"}}}, + {int8(), int8()}, "", int8()}, {{kSubstraitArithmeticFunctionsUri, "multiply"}, - {"ERROR", "10", "13"}, - {nullptr, int8(), int8()}, + {"10", "13"}, + {{"overflow", {"ERROR", "SILENT"}}}, + {int8(), int8()}, "", int8()}, {{kSubstraitArithmeticFunctionsUri, "divide"}, - {"ERROR", "-128", "-1"}, - {nullptr, int8(), int8()}, + {"-128", "-1"}, + {{"overflow", {"ERROR", "SILENT"}}}, + {int8(), int8()}, "", int8()}}; CheckErrorTestCases(error_test_cases); } +TEST(FunctionMapping, UnrecognizedOptions) { + CheckNonYetImplementedTestCase( + {{kSubstraitArithmeticFunctionsUri, "add"}, + {"-119", "10"}, + {{"overflow", {"NEW_OVERFLOW_TYPE", "SILENT"}}}, + {int8(), int8()}, + "", + int8()}, + ::testing::HasSubstr("The value NEW_OVERFLOW_TYPE is not an expected enum value")); + CheckNonYetImplementedTestCase( + {{kSubstraitArithmeticFunctionsUri, "add"}, + {"-119", "10"}, + {{"overflow", {"SATURATE"}}}, + {int8(), int8()}, + "", + int8()}, + ::testing::HasSubstr( + "During a call to a function with id " + + std::string(kSubstraitArithmeticFunctionsUri) + + "#add the plan requested the option overflow to be one of [SATURATE] but the " + "only supported options are [SILENT, ERROR]")); +} + // For each aggregate test case we take in three values. We compute the // aggregate both on the entire set (all three values) and on groups. The // first two rows will be in the first group and the last row will be in the diff --git a/cpp/src/arrow/engine/substrait/plan_internal.cc b/cpp/src/arrow/engine/substrait/plan_internal.cc index 18915868ee0ef..64f129101f4fb 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.cc +++ b/cpp/src/arrow/engine/substrait/plan_internal.cc @@ -17,6 +17,7 @@ #include "arrow/engine/substrait/plan_internal.h" +#include "arrow/config.h" #include "arrow/dataset/plan.h" #include "arrow/engine/substrait/relation_internal.h" #include "arrow/result.h" @@ -132,10 +133,29 @@ Result GetExtensionSetFromPlan(const substrait::Plan& plan, conversion_options, registry); } +namespace { + +// FIXME Is there some way to get these from the cmake files? +constexpr uint32_t kSubstraitMajorVersion = 0; +constexpr uint32_t kSubstraitMinorVersion = 19; +constexpr uint32_t kSubstraitPatchVersion = 0; + +std::unique_ptr CreateVersion() { + auto version = std::make_unique(); + version->set_major(kSubstraitMajorVersion); + version->set_minor(kSubstraitMinorVersion); + version->set_patch(kSubstraitPatchVersion); + version->set_producer("Acero " + GetBuildInfo().version_string); + return version; +} + +} // namespace + Result> PlanToProto( const compute::Declaration& declr, ExtensionSet* ext_set, const ConversionOptions& conversion_options) { auto subs_plan = std::make_unique(); + subs_plan->set_allocated_version(CreateVersion().release()); auto plan_rel = std::make_unique(); auto rel_root = std::make_unique(); ARROW_ASSIGN_OR_RAISE(auto rel, ToProto(declr, ext_set, conversion_options)); diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc index f8c846c5a2389..b2cac3f82e45f 100644 --- a/cpp/src/arrow/engine/substrait/serde.cc +++ b/cpp/src/arrow/engine/substrait/serde.cc @@ -139,12 +139,22 @@ DeclarationFactory MakeWriteDeclarationFactory( }; } +// FIXME - Replace with actual version that includes the change +constexpr uint32_t kMinimumMajorVersion = 0; +constexpr uint32_t kMinimumMinorVersion = 19; + Result> DeserializePlans( const Buffer& buf, DeclarationFactory declaration_factory, const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out, const ConversionOptions& conversion_options) { ARROW_ASSIGN_OR_RAISE(auto plan, ParseFromBuffer(buf)); + if (plan.version().major() < kMinimumMajorVersion && + plan.version().minor() < kMinimumMinorVersion) { + return Status::Invalid("Can only parse plans with a version >= ", + kMinimumMajorVersion, ".", kMinimumMinorVersion); + } + ARROW_ASSIGN_OR_RAISE(auto ext_set, GetExtensionSetFromPlan(plan, conversion_options, registry)); diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 8831b9d4674ed..a74b491f872f5 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -837,6 +837,7 @@ TEST(Substrait, RelWithHint) { TEST(Substrait, ExtensionSetFromPlan) { std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [ {"rel": { "read": { @@ -986,6 +987,7 @@ TEST(Substrait, ExtensionSetFromPlanExhaustedFactory) { TEST(Substrait, ExtensionSetFromPlanRegisterFunc) { std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [], "extension_uris": [ { @@ -1036,6 +1038,7 @@ Result GetSubstraitJSON() { auto file_path = file_name->ToString(); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [ {"rel": { "read": { @@ -1167,6 +1170,7 @@ TEST(Substrait, GetRecordBatchReader) { TEST(Substrait, InvalidPlan) { std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [ ] })"; @@ -1177,8 +1181,34 @@ TEST(Substrait, InvalidPlan) { }); } +TEST(Substrait, InvalidMinimumVersion) { + ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 0, "minor": 18, "patch": 0 }, + "relations": [{ + "rel": { + "read": { + "base_schema": { + "names": ["A"], + "struct": { + "types": [{ + "i32": {} + }] + } + }, + "named_table": { "names": ["x"] } + } + } + }], + "extensionUris": [], + "extensions": [], + })")); + + ASSERT_RAISES(Invalid, DeserializePlans(*buf, [] { return kNullConsumer; })); +} + TEST(Substrait, JoinPlanBasic) { std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "join": { @@ -1324,6 +1354,7 @@ TEST(Substrait, JoinPlanBasic) { TEST(Substrait, JoinPlanInvalidKeyCmp) { std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "join": { @@ -1442,6 +1473,7 @@ TEST(Substrait, JoinPlanInvalidKeyCmp) { TEST(Substrait, JoinPlanInvalidExpression) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "join": { @@ -1511,6 +1543,7 @@ TEST(Substrait, JoinPlanInvalidExpression) { TEST(Substrait, JoinPlanInvalidKeys) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "join": { @@ -1585,6 +1618,7 @@ TEST(Substrait, JoinPlanInvalidKeys) { TEST(Substrait, AggregateBasic) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -1680,6 +1714,7 @@ TEST(Substrait, AggregateBasic) { TEST(Substrait, AggregateInvalidRel) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -1706,6 +1741,7 @@ TEST(Substrait, AggregateInvalidRel) { TEST(Substrait, AggregateInvalidFunction) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -1769,6 +1805,7 @@ TEST(Substrait, AggregateInvalidFunction) { TEST(Substrait, AggregateInvalidAggFuncArgs) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -1810,7 +1847,7 @@ TEST(Substrait, AggregateInvalidAggFuncArgs) { "measures": [{ "measure": { "functionReference": 0, - "args": [], + "arguments": [], "sorts": [], "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", "invocation": "AGGREGATION_INVOCATION_ALL", @@ -1842,6 +1879,7 @@ TEST(Substrait, AggregateInvalidAggFuncArgs) { TEST(Substrait, AggregateWithFilter) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -1883,7 +1921,7 @@ TEST(Substrait, AggregateWithFilter) { "measures": [{ "measure": { "functionReference": 0, - "args": [], + "arguments": [], "sorts": [], "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", "invocation": "AGGREGATION_INVOCATION_ALL", @@ -1915,6 +1953,7 @@ TEST(Substrait, AggregateWithFilter) { TEST(Substrait, AggregateBadPhase) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -1956,7 +1995,7 @@ TEST(Substrait, AggregateBadPhase) { "measures": [{ "measure": { "functionReference": 0, - "args": [], + "arguments": [], "sorts": [], "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", "invocation": "AGGREGATION_INVOCATION_DISTINCT", @@ -1985,7 +2024,7 @@ TEST(Substrait, AggregateBadPhase) { ASSERT_RAISES(NotImplemented, DeserializePlans(*buf, [] { return kNullConsumer; })); } -TEST(Substrait, BasicPlanRoundTripping) { +TEST(SubstraitRoundTrip, BasicPlan) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2099,7 +2138,7 @@ TEST(Substrait, BasicPlanRoundTripping) { } } -TEST(Substrait, BasicPlanRoundTrippingEndToEnd) { +TEST(SubstraitRoundTrip, BasicPlanEndToEnd) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2215,7 +2254,7 @@ TEST(Substrait, BasicPlanRoundTrippingEndToEnd) { EXPECT_TRUE(expected_table->Equals(*rnd_trp_table)); } -TEST(Substrait, ProjectRel) { +TEST(SubstraitRoundTrip, ProjectRel) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2234,6 +2273,7 @@ TEST(Substrait, ProjectRel) { ])"}); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "project": { @@ -2332,7 +2372,7 @@ TEST(Substrait, ProjectRel) { buf, {}, conversion_options); } -TEST(Substrait, ProjectRelOnFunctionWithEmit) { +TEST(SubstraitRoundTrip, ProjectRelOnFunctionWithEmit) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2351,6 +2391,7 @@ TEST(Substrait, ProjectRelOnFunctionWithEmit) { ])"}); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "project": { @@ -2453,7 +2494,7 @@ TEST(Substrait, ProjectRelOnFunctionWithEmit) { buf, {}, conversion_options); } -TEST(Substrait, ReadRelWithEmit) { +TEST(SubstraitRoundTrip, ReadRelWithEmit) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2468,6 +2509,7 @@ TEST(Substrait, ReadRelWithEmit) { ])"}); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "read": { @@ -2514,7 +2556,7 @@ TEST(Substrait, ReadRelWithEmit) { buf, {}, conversion_options); } -TEST(Substrait, FilterRelWithEmit) { +TEST(SubstraitRoundTrip, FilterRelWithEmit) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2534,6 +2576,7 @@ TEST(Substrait, FilterRelWithEmit) { ])"}); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "filter": { @@ -2634,7 +2677,7 @@ TEST(Substrait, FilterRelWithEmit) { buf, {}, conversion_options); } -TEST(Substrait, JoinRelEndToEnd) { +TEST(SubstraitRoundTrip, JoinRel) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2657,6 +2700,7 @@ TEST(Substrait, JoinRelEndToEnd) { ])"}); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "join": { @@ -2786,7 +2830,7 @@ TEST(Substrait, JoinRelEndToEnd) { buf, {}, conversion_options); } -TEST(Substrait, JoinRelWithEmit) { +TEST(SubstraitRoundTrip, JoinRelWithEmit) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2809,6 +2853,7 @@ TEST(Substrait, JoinRelWithEmit) { ])"}); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "join": { @@ -2940,7 +2985,7 @@ TEST(Substrait, JoinRelWithEmit) { buf, {}, conversion_options); } -TEST(Substrait, AggregateRel) { +TEST(SubstraitRoundTrip, AggregateRel) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -2960,6 +3005,7 @@ TEST(Substrait, AggregateRel) { ])"}); std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -3051,7 +3097,7 @@ TEST(Substrait, AggregateRel) { buf, {}, conversion_options); } -TEST(Substrait, AggregateRelEmit) { +TEST(SubstraitRoundTrip, AggregateRelEmit) { #ifdef _WIN32 GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #endif @@ -3072,6 +3118,7 @@ TEST(Substrait, AggregateRelEmit) { // TODO: fixme https://issues.apache.org/jira/browse/ARROW-17484 std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "relations": [{ "rel": { "aggregate": { @@ -3171,10 +3218,8 @@ TEST(Substrait, AggregateRelEmit) { TEST(Substrait, IsthmusPlan) { // This is a plan generated from Isthmus // isthmus -c "CREATE TABLE T1(foo int)" "SELECT foo + 1 FROM T1" - // - // The plan had to be modified slightly to introduce the missing enum - // argument that isthmus did not put there. std::string substrait_json = R"({ + "version": { "major": 9999, "minor": 9999, "patch": 9999 }, "extensionUris": [{ "extensionUriAnchor": 1, "uri": "/functions_arithmetic.yaml" @@ -3183,7 +3228,7 @@ TEST(Substrait, IsthmusPlan) { "extensionFunction": { "extensionUriReference": 1, "functionAnchor": 0, - "name": "add:opt_i32_i32" + "name": "add:i32_i32" } }], "relations": [{ @@ -3222,7 +3267,6 @@ TEST(Substrait, IsthmusPlan) { "expressions": [{ "scalarFunction": { "functionReference": 0, - "args": [], "outputType": { "i32": { "typeVariationReference": 0, @@ -3230,10 +3274,6 @@ TEST(Substrait, IsthmusPlan) { } }, "arguments": [{ - "enum": { - "unspecified": {} - } - }, { "value": { "selection": { "directReference": { @@ -3300,7 +3340,7 @@ TEST(Substrait, ProjectWithMultiFieldExpressions) { "extensionFunction": { "extensionUriReference": 1, "functionAnchor": 0, - "name": "add:opt_i32_i32" + "name": "add:i32_i32" } }], "relations": [{ @@ -3379,7 +3419,6 @@ TEST(Substrait, ProjectWithMultiFieldExpressions) { },{ "scalarFunction": { "functionReference": 0, - "args": [], "outputType": { "i32": { "typeVariationReference": 0, @@ -3387,10 +3426,6 @@ TEST(Substrait, ProjectWithMultiFieldExpressions) { } }, "arguments": [{ - "enum": { - "unspecified": {} - } - }, { "value": { "selection": { "directReference": { @@ -3490,7 +3525,6 @@ TEST(Substrait, NestedProjectWithMultiFieldExpressions) { "functionReference": 2, "outputType": {"i32": {}}, "arguments": [ - {"enum": {"unspecified": {}}}, {"value": {"selection": {"directReference": {"structField": {"field": 0}}}}}, {"value": {"literal": {"fp64": 10}}} ] @@ -3579,7 +3613,6 @@ TEST(Substrait, NestedEmitProjectWithMultiFieldExpressions) { "functionReference": 2, "outputType": {"i32": {}}, "arguments": [ - {"enum": {"unspecified": {}}}, {"value": {"selection": {"directReference": {"structField": {"field": 0}}}}}, {"value": {"literal": {"fp64": 10}}} ] diff --git a/cpp/src/arrow/engine/substrait/test_plan_builder.cc b/cpp/src/arrow/engine/substrait/test_plan_builder.cc index d175006c63bce..79820672ed9de 100644 --- a/cpp/src/arrow/engine/substrait/test_plan_builder.cc +++ b/cpp/src/arrow/engine/substrait/test_plan_builder.cc @@ -67,6 +67,7 @@ void CreateDirectReference(int32_t index, substrait::Expression* expr) { Result> CreateProject( Id function_id, const std::vector& arguments, + const std::unordered_map> options, const std::vector>& arg_types, const DataType& output_type, ExtensionSet* ext_set) { auto project = std::make_unique(); @@ -88,16 +89,18 @@ Result> CreateProject( // If it doesn't have a type then it's an enum const std::string& enum_value = arguments[arg_index]; auto enum_ = std::make_unique(); - if (enum_value.size() > 0) { - enum_->set_specified(enum_value); - } else { - auto unspecified = std::make_unique(); - enum_->set_allocated_unspecified(unspecified.release()); - } + enum_->set_specified(enum_value); argument->set_allocated_enum_(enum_.release()); } arg_index++; } + for (const auto& opt : options) { + substrait::FunctionOption* option = call->add_options(); + option->set_name(opt.first); + for (const std::string& pref : opt.second) { + option->add_preference(pref); + } + } ARROW_ASSIGN_OR_RAISE( std::unique_ptr output_type_substrait, @@ -150,9 +153,19 @@ Result> CreateAgg(Id function_id, return agg; } +std::unique_ptr CreateTestVersion() { + auto version = std::make_unique(); + version->set_major(std::numeric_limits::max()); + version->set_minor(std::numeric_limits::max()); + version->set_patch(std::numeric_limits::max()); + version->set_producer("Arrow unit test"); + return version; +} + Result> CreatePlan(std::unique_ptr root, ExtensionSet* ext_set) { auto plan = std::make_unique(); + plan->set_allocated_version(CreateTestVersion().release()); substrait::PlanRel* plan_rel = plan->add_relations(); auto rel_root = std::make_unique(); @@ -166,6 +179,7 @@ Result> CreatePlan(std::unique_ptr> CreateScanProjectSubstrait( Id function_id, const std::shared_ptr
& input_table, const std::vector& arguments, + const std::unordered_map>& options, const std::vector>& data_types, const DataType& output_type) { ExtensionSet ext_set; @@ -173,7 +187,7 @@ Result> CreateScanProjectSubstrait( CreateRead(*input_table, &ext_set)); ARROW_ASSIGN_OR_RAISE( std::unique_ptr project, - CreateProject(function_id, arguments, data_types, output_type, &ext_set)); + CreateProject(function_id, arguments, options, data_types, output_type, &ext_set)); auto read_rel = std::make_unique(); read_rel->set_allocated_read(read.release()); diff --git a/cpp/src/arrow/engine/substrait/test_plan_builder.h b/cpp/src/arrow/engine/substrait/test_plan_builder.h index 9d2d97a8cc9cc..5f6629e9054ce 100644 --- a/cpp/src/arrow/engine/substrait/test_plan_builder.h +++ b/cpp/src/arrow/engine/substrait/test_plan_builder.h @@ -27,6 +27,7 @@ #include #include +#include #include #include "arrow/buffer.h" @@ -55,6 +56,7 @@ namespace internal { ARROW_ENGINE_EXPORT Result> CreateScanProjectSubstrait( Id function_id, const std::shared_ptr
& input_table, const std::vector& arguments, + const std::unordered_map>& options, const std::vector>& data_types, const DataType& output_type); diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py index 030e4aad8203f..e6358666f44ad 100644 --- a/python/pyarrow/tests/test_substrait.py +++ b/python/pyarrow/tests/test_substrait.py @@ -43,6 +43,7 @@ def _write_dummy_data_to_disk(tmpdir, file_name, table): def test_run_serialized_query(tmpdir): substrait_query = """ { + "version": { "major": 9999 }, "relations": [ {"rel": { "read": { @@ -116,6 +117,7 @@ def test_invalid_plan(): def test_binary_conversion_with_json_options(tmpdir): substrait_query = """ { + "version": { "major": 9999 }, "relations": [ {"rel": { "read": { @@ -195,6 +197,7 @@ def table_provider(names): substrait_query = """ { + "version": { "major": 9999 }, "relations": [ {"rel": { "read": { @@ -236,6 +239,7 @@ def table_provider(names): substrait_query = """ { + "version": { "major": 9999 }, "relations": [ {"rel": { "read": { @@ -277,6 +281,7 @@ def table_provider(names): substrait_query = """ { + "version": { "major": 9999 }, "relations": [ {"rel": { "read": {