Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-14875: [C++] C Data Interface: check imported buffer for non-null #14814

Merged
merged 2 commits into from Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 7 additions & 4 deletions cpp/src/arrow/array/validate.cc
Expand Up @@ -459,14 +459,17 @@ struct ValidateArrayImpl {
if (buffer == nullptr) {
continue;
}
int64_t min_buffer_size = -1;
int64_t min_buffer_size = 0;
switch (spec.kind) {
case DataTypeLayout::BITMAP:
min_buffer_size = bit_util::BytesForBits(length_plus_offset);
// If length == 0, buffer size can be 0 regardless of offset
if (data.length > 0) {
min_buffer_size = bit_util::BytesForBits(length_plus_offset);
}
break;
case DataTypeLayout::FIXED_WIDTH:
if (MultiplyWithOverflow(length_plus_offset, spec.byte_width,
&min_buffer_size)) {
if (data.length > 0 && MultiplyWithOverflow(length_plus_offset, spec.byte_width,
&min_buffer_size)) {
return Status::Invalid("Array of type ", type.ToString(),
" has impossibly large length and offset");
}
Expand Down
40 changes: 31 additions & 9 deletions cpp/src/arrow/c/bridge.cc
Expand Up @@ -31,6 +31,7 @@
#include "arrow/c/util_internal.h"
#include "arrow/extension_type.h"
#include "arrow/memory_pool.h"
#include "arrow/memory_pool_internal.h" // for kZeroSizeArea
#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/stl_allocator.h"
Expand Down Expand Up @@ -60,6 +61,8 @@ using internal::SchemaExportTraits;

using internal::ToChars;

using memory_pool::internal::kZeroSizeArea;

namespace {

Status ExportingNotImplemented(const DataType& type) {
Expand Down Expand Up @@ -1265,7 +1268,8 @@ class ImportedBuffer : public Buffer {
};

struct ArrayImporter {
explicit ArrayImporter(const std::shared_ptr<DataType>& type) : type_(type) {}
explicit ArrayImporter(const std::shared_ptr<DataType>& type)
: type_(type), zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 0)) {}

Status Import(struct ArrowArray* src) {
if (ArrowArrayIsReleased(src)) {
Expand Down Expand Up @@ -1529,7 +1533,7 @@ struct ArrayImporter {
}

Status ImportNullBitmap(int32_t buffer_id = 0) {
RETURN_NOT_OK(ImportBitsBuffer(buffer_id));
RETURN_NOT_OK(ImportBitsBuffer(buffer_id, /*is_null_bitmap=*/true));
if (data_->null_count > 0 && data_->buffers[buffer_id] == nullptr) {
return Status::Invalid(
"ArrowArray struct has null bitmap buffer but non-zero null_count ",
Expand All @@ -1538,15 +1542,20 @@ struct ArrayImporter {
return Status::OK();
}

Status ImportBitsBuffer(int32_t buffer_id) {
Status ImportBitsBuffer(int32_t buffer_id, bool is_null_bitmap = false) {
// Compute visible size of buffer
int64_t buffer_size = bit_util::BytesForBits(c_struct_->length + c_struct_->offset);
return ImportBuffer(buffer_id, buffer_size);
int64_t buffer_size =
(c_struct_->length > 0)
? bit_util::BytesForBits(c_struct_->length + c_struct_->offset)
: 0;
return ImportBuffer(buffer_id, buffer_size, is_null_bitmap);
}

Status ImportFixedSizeBuffer(int32_t buffer_id, int64_t byte_width) {
// Compute visible size of buffer
int64_t buffer_size = byte_width * (c_struct_->length + c_struct_->offset);
int64_t buffer_size = (c_struct_->length > 0)
? byte_width * (c_struct_->length + c_struct_->offset)
: 0;
return ImportBuffer(buffer_id, buffer_size);
}

Expand All @@ -1563,17 +1572,27 @@ struct ArrayImporter {
int64_t byte_width = 1) {
auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
// Compute visible size of buffer
int64_t buffer_size = byte_width * offsets[c_struct_->length];
int64_t buffer_size =
(c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
return ImportBuffer(buffer_id, buffer_size);
}

Status ImportBuffer(int32_t buffer_id, int64_t buffer_size) {
Status ImportBuffer(int32_t buffer_id, int64_t buffer_size,
bool is_null_bitmap = false) {
std::shared_ptr<Buffer>* out = &data_->buffers[buffer_id];
auto data = reinterpret_cast<const uint8_t*>(c_struct_->buffers[buffer_id]);
if (data != nullptr) {
*out = std::make_shared<ImportedBuffer>(data, buffer_size, import_);
} else {
} else if (is_null_bitmap) {
out->reset();
} else {
// Ensure that imported buffers are never null (except for the null bitmap)
if (buffer_size != 0) {
return Status::Invalid(
"ArrowArrayStruct contains null data pointer "
"for a buffer with non-zero computed size");
}
*out = zero_size_buffer_;
}
return Status::OK();
}
Expand All @@ -1585,6 +1604,9 @@ struct ArrayImporter {
std::shared_ptr<ImportedArrayData> import_;
std::shared_ptr<ArrayData> data_;
std::vector<ArrayImporter> child_importers_;

// For imported null buffer pointers
std::shared_ptr<Buffer> zero_size_buffer_;
};

} // namespace
Expand Down
132 changes: 129 additions & 3 deletions cpp/src/arrow/c/bridge_test.cc
Expand Up @@ -124,6 +124,24 @@ class ReleaseCallback {
using SchemaReleaseCallback = ReleaseCallback<SchemaExportTraits>;
using ArrayReleaseCallback = ReleaseCallback<ArrayExportTraits>;

// Whether c_struct or any of its descendents have non-null data pointers.
bool HasData(const ArrowArray* c_struct) {
for (int64_t i = 0; i < c_struct->n_buffers; ++i) {
if (c_struct->buffers[i] != nullptr) {
return true;
}
}
if (c_struct->dictionary && HasData(c_struct->dictionary)) {
return true;
}
for (int64_t i = 0; i < c_struct->n_children; ++i) {
if (HasData(c_struct->children[i])) {
return true;
}
}
return false;
}

static const std::vector<std::string> kMetadataKeys1{"key1", "key2"};
static const std::vector<std::string> kMetadataValues1{"", "bar"};

Expand Down Expand Up @@ -1659,6 +1677,8 @@ static const uint8_t bits_buffer1[] = {0xed, 0xed};
static const void* buffers_no_nulls_no_data[1] = {nullptr};
static const void* buffers_nulls_no_data1[1] = {bits_buffer1};

static const void* all_buffers_omitted[3] = {nullptr, nullptr, nullptr};

static const uint8_t data_buffer1[] = {1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16};
static const uint8_t data_buffer2[] = "abcdefghijklmnopqrstuvwxyz";
Expand Down Expand Up @@ -1724,10 +1744,13 @@ static const uint8_t string_data_buffer1[] = "foobarquuxxyzzy";
static const int32_t string_offsets_buffer1[] = {0, 3, 3, 6, 10, 15};
static const void* string_buffers_no_nulls1[3] = {nullptr, string_offsets_buffer1,
string_data_buffer1};
static const void* string_buffers_omitted[3] = {nullptr, string_offsets_buffer1, nullptr};

static const int64_t large_string_offsets_buffer1[] = {0, 3, 3, 6, 10};
static const void* large_string_buffers_no_nulls1[3] = {
nullptr, large_string_offsets_buffer1, string_data_buffer1};
static const void* large_string_buffers_omitted[3] = {
nullptr, large_string_offsets_buffer1, nullptr};

static const int32_t list_offsets_buffer1[] = {0, 2, 2, 5, 6, 8};
static const void* list_buffers_no_nulls1[2] = {nullptr, list_offsets_buffer1};
Expand Down Expand Up @@ -1901,9 +1924,9 @@ class TestArrayImport : public ::testing::Test {
Reset(); // for further tests

ASSERT_OK(array->ValidateFull());
// Special case: Null array doesn't have any data, so it needn't
// keep the ArrowArray struct alive.
if (type->id() != Type::NA) {
// Special case: arrays without data (such as Null arrays) needn't keep
// the ArrowArray struct alive.
if (HasData(&c_struct_)) {
cb.AssertNotCalled();
}
AssertArraysEqual(*expected, *array, true);
Expand Down Expand Up @@ -1990,6 +2013,10 @@ TEST_F(TestArrayImport, Primitive) {
CheckImport(ArrayFromJSON(boolean(), "[true, null, false]"));
FillPrimitive(3, 1, 0, primitive_buffers_nulls1_8);
CheckImport(ArrayFromJSON(boolean(), "[true, null, false]"));

// Empty array with null data pointers
FillPrimitive(0, 0, 0, all_buffers_omitted);
CheckImport(ArrayFromJSON(int32(), "[]"));
}

TEST_F(TestArrayImport, Temporal) {
Expand Down Expand Up @@ -2070,6 +2097,12 @@ TEST_F(TestArrayImport, PrimitiveWithOffset) {

FillPrimitive(4, 0, 7, primitive_buffers_no_nulls1_8);
CheckImport(ArrayFromJSON(boolean(), "[false, false, true, false]"));

// Empty array with null data pointers
FillPrimitive(0, 0, 2, all_buffers_omitted);
CheckImport(ArrayFromJSON(int32(), "[]"));
FillPrimitive(0, 0, 3, all_buffers_omitted);
CheckImport(ArrayFromJSON(boolean(), "[]"));
}

TEST_F(TestArrayImport, NullWithOffset) {
Expand All @@ -2092,10 +2125,48 @@ TEST_F(TestArrayImport, String) {
FillStringLike(4, 0, 0, large_string_buffers_no_nulls1);
CheckImport(ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])"));

// Empty array with null data pointers
FillStringLike(0, 0, 0, string_buffers_omitted);
CheckImport(ArrayFromJSON(utf8(), "[]"));
FillStringLike(0, 0, 0, large_string_buffers_omitted);
CheckImport(ArrayFromJSON(large_binary(), "[]"));
}

TEST_F(TestArrayImport, StringWithOffset) {
FillStringLike(3, 0, 1, string_buffers_no_nulls1);
CheckImport(ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"));
FillStringLike(2, 0, 2, large_string_buffers_no_nulls1);
CheckImport(ArrayFromJSON(large_utf8(), R"(["bar", "quux"])"));

// Empty array with null data pointers
FillStringLike(0, 0, 1, string_buffers_omitted);
CheckImport(ArrayFromJSON(utf8(), "[]"));
}

TEST_F(TestArrayImport, FixedSizeBinary) {
FillPrimitive(2, 0, 0, primitive_buffers_no_nulls2);
CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["abc", "def"])"));
FillPrimitive(2, 0, 0, primitive_buffers_no_nulls3);
CheckImport(ArrayFromJSON(decimal(15, 4), R"(["12345.6789", "98765.4321"])"));

// Empty array with null data pointers
FillPrimitive(0, 0, 0, all_buffers_omitted);
CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]"));
FillPrimitive(0, 0, 0, all_buffers_omitted);
CheckImport(ArrayFromJSON(decimal(15, 4), "[]"));
}

TEST_F(TestArrayImport, FixedSizeBinaryWithOffset) {
FillPrimitive(1, 0, 1, primitive_buffers_no_nulls2);
CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["def"])"));
FillPrimitive(1, 0, 1, primitive_buffers_no_nulls3);
CheckImport(ArrayFromJSON(decimal(15, 4), R"(["98765.4321"])"));

// Empty array with null data pointers
FillPrimitive(0, 0, 1, all_buffers_omitted);
CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]"));
FillPrimitive(0, 0, 1, all_buffers_omitted);
CheckImport(ArrayFromJSON(decimal(15, 4), "[]"));
}

TEST_F(TestArrayImport, List) {
Expand All @@ -2117,6 +2188,11 @@ TEST_F(TestArrayImport, List) {
FillFixedSizeListLike(3, 0, 0, buffers_no_nulls_no_data);
CheckImport(
ArrayFromJSON(fixed_size_list(int8(), 3), "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]"));

// Empty child array with null data pointers
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillFixedSizeListLike(0, 0, 0, buffers_no_nulls_no_data);
CheckImport(ArrayFromJSON(fixed_size_list(int8(), 3), "[]"));
}

TEST_F(TestArrayImport, NestedList) {
Expand Down Expand Up @@ -2205,6 +2281,15 @@ TEST_F(TestArrayImport, SparseUnion) {
FillUnionLike(UnionMode::SPARSE, 4, 0, 0, 2, sparse_union_buffers1_legacy,
/*legacy=*/true);
CheckImport(expected);

// Empty array with null data pointers
expected = ArrayFromJSON(type, "[]");
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::SPARSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false);
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::SPARSE, 0, 0, 3, 2, all_buffers_omitted, /*legacy=*/false);
}

TEST_F(TestArrayImport, DenseUnion) {
Expand All @@ -2223,6 +2308,15 @@ TEST_F(TestArrayImport, DenseUnion) {
FillUnionLike(UnionMode::DENSE, 5, 0, 0, 2, dense_union_buffers1_legacy,
/*legacy=*/true);
CheckImport(expected);

// Empty array with null data pointers
expected = ArrayFromJSON(type, "[]");
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::DENSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false);
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::DENSE, 0, 0, 3, 2, all_buffers_omitted, /*legacy=*/false);
}

TEST_F(TestArrayImport, StructWithOffset) {
Expand Down Expand Up @@ -2359,6 +2453,29 @@ TEST_F(TestArrayImport, PrimitiveError) {
// Zero null bitmap but non-zero null_count
FillPrimitive(3, 1, 0, primitive_buffers_no_nulls1_8);
CheckImportError(int8());

// Null data pointers with non-zero length
FillPrimitive(1, 0, 0, all_buffers_omitted);
CheckImportError(int8());
FillPrimitive(1, 0, 0, all_buffers_omitted);
CheckImportError(boolean());
FillPrimitive(1, 0, 0, all_buffers_omitted);
CheckImportError(fixed_size_binary(3));
}

TEST_F(TestArrayImport, StringError) {
// Bad number of buffers
FillStringLike(4, 0, 0, string_buffers_no_nulls1);
c_struct_.n_buffers = 2;
CheckImportError(utf8());

// Null data pointers with non-zero length
FillStringLike(4, 0, 0, string_buffers_omitted);
CheckImportError(utf8());

// Null offsets pointer
FillStringLike(0, 0, 0, all_buffers_omitted);
CheckImportError(utf8());
}

TEST_F(TestArrayImport, StructError) {
Expand All @@ -2369,6 +2486,13 @@ TEST_F(TestArrayImport, StructError) {
CheckImportError(struct_({field("strs", utf8())}));
}

TEST_F(TestArrayImport, ListError) {
// Null offsets pointer
FillPrimitive(AddChild(), 0, 0, 0, primitive_buffers_no_nulls1_8);
FillListLike(0, 0, 0, all_buffers_omitted);
CheckImportError(list(int8()));
}

TEST_F(TestArrayImport, MapError) {
// Bad number of (struct) children in map child
FillStringLike(AddChild(), 5, 0, 0, string_buffers_no_nulls1);
Expand Down Expand Up @@ -2859,8 +2983,10 @@ TEST_F(TestArrayRoundtrip, UnknownNullCount) {
TEST_F(TestArrayRoundtrip, List) {
TestWithJSON(list(int32()), "[]");
TestWithJSON(list(int32()), "[[4, 5], [6, null], null]");
TestWithJSON(fixed_size_list(int32(), 3), "[[4, 5, 6], null, [7, 8, null]]");

TestWithJSONSliced(list(int32()), "[[4, 5], [6, null], null]");
TestWithJSONSliced(fixed_size_list(int32(), 3), "[[4, 5, 6], null, [7, 8, null]]");
}

TEST_F(TestArrayRoundtrip, Struct) {
Expand Down