From 0648ed2d77e2079e33870c243ef0c6f64caafb11 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 3 Mar 2016 17:04:52 -0800 Subject: [PATCH 1/7] Prototyping --- cpp/src/arrow/schema-test.cc | 7 +-- cpp/src/arrow/table/CMakeLists.txt | 38 ++++++++++++ cpp/src/arrow/table/column-test.cc | 67 +++++++++++++++++++++ cpp/src/arrow/table/column.h | 94 ++++++++++++++++++++++++++++++ cpp/src/arrow/type.cc | 12 ++++ cpp/src/arrow/type.h | 18 +++++- 6 files changed, 231 insertions(+), 5 deletions(-) create mode 100644 cpp/src/arrow/table/CMakeLists.txt create mode 100644 cpp/src/arrow/table/column-test.cc create mode 100644 cpp/src/arrow/table/column.h diff --git a/cpp/src/arrow/schema-test.cc b/cpp/src/arrow/schema-test.cc index 3debb9cec3c00..7c190d068c2a6 100644 --- a/cpp/src/arrow/schema-test.cc +++ b/cpp/src/arrow/schema-test.cc @@ -31,7 +31,7 @@ using std::vector; namespace arrow { TEST(TestField, Basics) { - shared_ptr ftype = std::make_shared(); + shared_ptr ftype = INT32; shared_ptr ftype_nn = std::make_shared(false); Field f0("f0", ftype); Field f0_nn("f0", ftype_nn); @@ -44,7 +44,7 @@ TEST(TestField, Basics) { } TEST(TestField, Equals) { - shared_ptr ftype = std::make_shared(); + shared_ptr ftype = INT32; shared_ptr ftype_nn = std::make_shared(false); Field f0("f0", ftype); @@ -61,8 +61,7 @@ class TestSchema : public ::testing::Test { }; TEST_F(TestSchema, Basics) { - auto f0 = std::make_shared("f0", std::make_shared()); - + auto f0 = std::make_shared("f0", INT32); auto f1 = std::make_shared("f1", std::make_shared(false)); auto f1_optional = std::make_shared("f1", std::make_shared()); diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt new file mode 100644 index 0000000000000..89a36547e4a94 --- /dev/null +++ b/cpp/src/arrow/table/CMakeLists.txt @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####################################### +# arrow_table +####################################### + +set(TABLE_SRCS +) + +set(TABLE_LIBS +) + +add_library(arrow_table STATIC + ${TABLE_SRCS} +) +target_link_libraries(arrow_table ${TABLE_LIBS}) +SET_TARGET_PROPERTIES(arrow_table PROPERTIES LINKER_LANGUAGE CXX) + +# Headers: top level +install(FILES + DESTINATION include/arrow/table) + +ADD_ARROW_TEST(column-test) diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc new file mode 100644 index 0000000000000..7ea62c8d35207 --- /dev/null +++ b/cpp/src/arrow/table/column-test.cc @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +#include "arrow/field.h" +#include "arrow/schema.h" +#include "arrow/table/column.h" +#include "arrow/type.h" + +using std::shared_ptr; +using std::vector; + +namespace arrow { + +class TestColumn : public ::testing::Test { + public: + void SetUp() { + pool_ = GetDefaultMemoryPool(); + } + + std::shared_ptr MakeInt32Array(int32_t length, int32_t null_count = 0) { + + } + + protected: + MemoryPool* pool_; + + std::shared_ptr data_; + std::unique_ptr column_; +}; + +TEST_F(TestColumn, BasicAPI) { + ArrayVector arrays; + + arrays.push_back(MakeInt32Array(100)); + arrays.push_back(MakeInt32Array(100, 10)); + arrays.push_back(MakeInt32Array(100, 20)); + + auto field = std::make_shared("c0", + + column_.reset(new Column(arrays) +} + +TEST_F(TestColumn, ChunksInhomogeneous) { + +} + +} // namespace arrow diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h new file mode 100644 index 0000000000000..5facb97adb8b7 --- /dev/null +++ b/cpp/src/arrow/table/column.h @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_TABLE_COLUMN_H +#define ARROW_TABLE_COLUMN_H + +#include +#include +#include + +#include "arrow/array.h" +#include "arrow/field.h" + +namespace arrow { + +typedef std::vector > ArrayVector; + +// A data structure managing a list of primitive Arrow arrays logically as one +// large array +class ChunkedArray { + public: + ChunkedArray(const ArrayVector& chunks); + + // @returns: the total length of the chunked array; computed on construction + int64_t length() const { + return length_; + } + + int num_chunks() const { + return chunks_.size(); + } + + const std::shared_ptr& chunk(int i) const; + + int64_t total_null_count() const; + + protected: + ArrayVector chunks_; + int64_t length_; +}; + +// An immutable column data structure consisting of a field (type metadata) and +// a logical chunked data array (which can be validated as all being the same +// type). +class Column { + public: + Column(const std::shared_ptr& field, const ArrayVector& chunks); + Column(const std::shared_ptr& field, + const std::shared_ptr& data); + + int64_t length() const { + return data_->length(); + } + + // @returns: the column's name in the passed metadata + const std::string& name() const { + return field_->name(); + } + + // @returns: the column's type according to the metadata + const std::shared_ptr& type() const { + return field_->type; + } + + // @returns: the column's data as a chunked logical array + const std::shared_ptr& data() const { + return data_; + } + // Verify that the column's array data is consistent with the passed field's + // metadata + Status ValidateData(); + + protected: + std::shared_ptr field_; + std::shared_ptr data_; +}; + +} // namespace arrow + +#endif // ARROW_TABLE_COLUMN_H diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 492eee52b04b1..ff145e2c1e3b4 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -19,4 +19,16 @@ namespace arrow { +const std::shared_ptr BOOL = std::make_shared(); +const std::shared_ptr UINT8 = std::make_shared(); +const std::shared_ptr UINT16 = std::make_shared(); +const std::shared_ptr UINT32 = std::make_shared(); +const std::shared_ptr UINT64 = std::make_shared(); +const std::shared_ptr INT8 = std::make_shared(); +const std::shared_ptr INT16 = std::make_shared(); +const std::shared_ptr INT32 = std::make_shared(); +const std::shared_ptr INT64 = std::make_shared(); +const std::shared_ptr FLOAT = std::make_shared(); +const std::shared_ptr DOUBLE = std::make_shared(); + } // namespace arrow diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 04cdb52b535db..92cb47e13c403 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -133,7 +133,7 @@ struct LogicalType { }; }; -struct DataType { +struct DataType : public std::enable_shared_from_this { LogicalType::type type; bool nullable; @@ -141,6 +141,10 @@ struct DataType { type(type), nullable(nullable) {} + std::shared_ptr operator()() { + return shared_from_this(); + } + virtual bool Equals(const DataType* other) { return this == other || (this->type == other->type && this->nullable == other->nullable); @@ -244,6 +248,18 @@ struct DoubleType : public PrimitiveType { PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double"); }; +extern const std::shared_ptr BOOL; +extern const std::shared_ptr UINT8; +extern const std::shared_ptr UINT16; +extern const std::shared_ptr UINT32; +extern const std::shared_ptr UINT64; +extern const std::shared_ptr INT8; +extern const std::shared_ptr INT16; +extern const std::shared_ptr INT32; +extern const std::shared_ptr INT64; +extern const std::shared_ptr FLOAT; +extern const std::shared_ptr DOUBLE; + } // namespace arrow #endif // ARROW_TYPE_H From a565d2620a2db5524303d9f1b807449b684d24ca Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 3 Mar 2016 19:12:06 -0800 Subject: [PATCH 2/7] Add ChunkedArray / Column ctors, test passes --- cpp/CMakeLists.txt | 2 ++ cpp/src/arrow/table/CMakeLists.txt | 1 + cpp/src/arrow/table/column-test.cc | 15 +++++++--- cpp/src/arrow/table/column.cc | 44 ++++++++++++++++++++++++++++++ cpp/src/arrow/table/column.h | 2 +- 5 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 cpp/src/arrow/table/column.cc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f425c5f310673..15afb1acf67cf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -458,10 +458,12 @@ endif() add_subdirectory(src/arrow) add_subdirectory(src/arrow/util) +add_subdirectory(src/arrow/table) add_subdirectory(src/arrow/types) set(LINK_LIBS arrow_util + arrow_table arrow_types) set(ARROW_SRCS diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt index 89a36547e4a94..a401622d2e0d7 100644 --- a/cpp/src/arrow/table/CMakeLists.txt +++ b/cpp/src/arrow/table/CMakeLists.txt @@ -20,6 +20,7 @@ ####################################### set(TABLE_SRCS + column.cc ) set(TABLE_LIBS diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc index 7ea62c8d35207..8d8a26fa29b70 100644 --- a/cpp/src/arrow/table/column-test.cc +++ b/cpp/src/arrow/table/column-test.cc @@ -25,6 +25,10 @@ #include "arrow/schema.h" #include "arrow/table/column.h" #include "arrow/type.h" +#include "arrow/types/integer.h" +#include "arrow/util/buffer.h" +#include "arrow/util/memory-pool.h" +#include "arrow/util/status.h" using std::shared_ptr; using std::vector; @@ -38,7 +42,11 @@ class TestColumn : public ::testing::Test { } std::shared_ptr MakeInt32Array(int32_t length, int32_t null_count = 0) { - + auto data = std::make_shared(pool_); + auto nulls = std::make_shared(pool_); + data->Resize(400); + data->Resize(13); + return std::make_shared(100, data, 10, nulls); } protected: @@ -55,9 +63,8 @@ TEST_F(TestColumn, BasicAPI) { arrays.push_back(MakeInt32Array(100, 10)); arrays.push_back(MakeInt32Array(100, 20)); - auto field = std::make_shared("c0", - - column_.reset(new Column(arrays) + auto field = std::make_shared("c0", INT32); + column_.reset(new Column(field, arrays)); } TEST_F(TestColumn, ChunksInhomogeneous) { diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc new file mode 100644 index 0000000000000..7d9951886a009 --- /dev/null +++ b/cpp/src/arrow/table/column.cc @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/table/column.h" + +#include + +#include "arrow/field.h" + +namespace arrow { + +ChunkedArray::ChunkedArray(const ArrayVector& chunks) : + chunks_(chunks) { + length_ = 0; + for (auto chunk : chunks) { + length_ += chunk->length(); + } +} + +Column::Column(const std::shared_ptr& field, const ArrayVector& chunks) : + field_(field) { + data_ = std::make_shared(chunks); +} + +Column::Column(const std::shared_ptr& field, + const std::shared_ptr& data) : + field_(field), + data_(data) {} + +} // namespace arrow diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h index 5facb97adb8b7..fc487c7dfa409 100644 --- a/cpp/src/arrow/table/column.h +++ b/cpp/src/arrow/table/column.h @@ -68,7 +68,7 @@ class Column { // @returns: the column's name in the passed metadata const std::string& name() const { - return field_->name(); + return field_->name; } // @returns: the column's type according to the metadata From 7049314272f2f3ae1b6fde53787c127f7a79c8cf Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 3 Mar 2016 19:12:40 -0800 Subject: [PATCH 3/7] cpplint --- cpp/src/arrow/table/column-test.cc | 1 - cpp/src/arrow/table/column.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc index 8d8a26fa29b70..43769c3430585 100644 --- a/cpp/src/arrow/table/column-test.cc +++ b/cpp/src/arrow/table/column-test.cc @@ -68,7 +68,6 @@ TEST_F(TestColumn, BasicAPI) { } TEST_F(TestColumn, ChunksInhomogeneous) { - } } // namespace arrow diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h index fc487c7dfa409..e834e2f3013b0 100644 --- a/cpp/src/arrow/table/column.h +++ b/cpp/src/arrow/table/column.h @@ -33,7 +33,7 @@ typedef std::vector > ArrayVector; // large array class ChunkedArray { public: - ChunkedArray(const ArrayVector& chunks); + explicit ChunkedArray(const ArrayVector& chunks); // @returns: the total length of the chunked array; computed on construction int64_t length() const { From de9ec701812103336f0174e881b90b3012a7714b Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 3 Mar 2016 19:15:26 -0800 Subject: [PATCH 4/7] Aggregate null counts too --- cpp/src/arrow/table/column-test.cc | 4 ++++ cpp/src/arrow/table/column.cc | 1 + cpp/src/arrow/table/column.h | 11 +++++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc index 43769c3430585..a5ca9931a9624 100644 --- a/cpp/src/arrow/table/column-test.cc +++ b/cpp/src/arrow/table/column-test.cc @@ -65,6 +65,10 @@ TEST_F(TestColumn, BasicAPI) { auto field = std::make_shared("c0", INT32); column_.reset(new Column(field, arrays)); + + ASSERT_EQ(300, column_->length()); + ASSERT_EQ(30, column_->null_count()); + ASSERT_EQ(3, column_->data()->num_chunks()); } TEST_F(TestColumn, ChunksInhomogeneous) { diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc index 7d9951886a009..bb0d833b36c32 100644 --- a/cpp/src/arrow/table/column.cc +++ b/cpp/src/arrow/table/column.cc @@ -28,6 +28,7 @@ ChunkedArray::ChunkedArray(const ArrayVector& chunks) : length_ = 0; for (auto chunk : chunks) { length_ += chunk->length(); + null_count_ += chunk->null_count(); } } diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h index e834e2f3013b0..8460059b83301 100644 --- a/cpp/src/arrow/table/column.h +++ b/cpp/src/arrow/table/column.h @@ -40,17 +40,20 @@ class ChunkedArray { return length_; } + int64_t null_count() const { + return null_count_; + } + int num_chunks() const { return chunks_.size(); } const std::shared_ptr& chunk(int i) const; - int64_t total_null_count() const; - protected: ArrayVector chunks_; int64_t length_; + int64_t null_count_; }; // An immutable column data structure consisting of a field (type metadata) and @@ -66,6 +69,10 @@ class Column { return data_->length(); } + int64_t null_count() const { + return data_->null_count(); + } + // @returns: the column's name in the passed metadata const std::string& name() const { return field_->name; From 8a2e40e37619d5abe9d9c966f08cff58964a9bfc Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 3 Mar 2016 19:22:00 -0800 Subject: [PATCH 5/7] Remove unneeded operator()/shared_from_this experiment --- cpp/src/arrow/type.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 92cb47e13c403..e5504c4713d17 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -133,7 +133,7 @@ struct LogicalType { }; }; -struct DataType : public std::enable_shared_from_this { +struct DataType { LogicalType::type type; bool nullable; @@ -141,10 +141,6 @@ struct DataType : public std::enable_shared_from_this { type(type), nullable(nullable) {} - std::shared_ptr operator()() { - return shared_from_this(); - } - virtual bool Equals(const DataType* other) { return this == other || (this->type == other->type && this->nullable == other->nullable); From 988135c3901e41488ef4a2feffca8309bbeee21c Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 4 Mar 2016 14:25:32 -0800 Subject: [PATCH 6/7] Add Column chunk type validation function --- cpp/src/arrow/array.h | 1 - cpp/src/arrow/table/column-test.cc | 32 ++++++++++++++++++++++-------- cpp/src/arrow/table/column.cc | 17 ++++++++++++++++ cpp/src/arrow/table/column.h | 4 +++- cpp/src/arrow/type.h | 5 +++++ cpp/src/arrow/types/list.h | 2 +- cpp/src/arrow/types/primitive.h | 20 ++++++++++--------- cpp/src/arrow/util/bit-util.h | 4 ++++ 8 files changed, 65 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 0632146637e59..85e853e2ae5e2 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -81,7 +81,6 @@ class Array { DISALLOW_COPY_AND_ASSIGN(Array); }; - typedef std::shared_ptr ArrayPtr; } // namespace arrow diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc index a5ca9931a9624..15f554f46325d 100644 --- a/cpp/src/arrow/table/column-test.cc +++ b/cpp/src/arrow/table/column-test.cc @@ -24,8 +24,10 @@ #include "arrow/field.h" #include "arrow/schema.h" #include "arrow/table/column.h" +#include "arrow/test-util.h" #include "arrow/type.h" #include "arrow/types/integer.h" +#include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" #include "arrow/util/memory-pool.h" #include "arrow/util/status.h" @@ -41,12 +43,13 @@ class TestColumn : public ::testing::Test { pool_ = GetDefaultMemoryPool(); } - std::shared_ptr MakeInt32Array(int32_t length, int32_t null_count = 0) { + template + std::shared_ptr MakeArray(int32_t length, int32_t null_count = 0) { auto data = std::make_shared(pool_); auto nulls = std::make_shared(pool_); - data->Resize(400); - data->Resize(13); - return std::make_shared(100, data, 10, nulls); + data->Resize(length * sizeof(typename ArrayType::value_type)); + nulls->Resize(util::bytes_for_bits(length)); + return std::make_shared(length, data, 10, nulls); } protected: @@ -58,20 +61,33 @@ class TestColumn : public ::testing::Test { TEST_F(TestColumn, BasicAPI) { ArrayVector arrays; - - arrays.push_back(MakeInt32Array(100)); - arrays.push_back(MakeInt32Array(100, 10)); - arrays.push_back(MakeInt32Array(100, 20)); + arrays.push_back(MakeArray(100)); + arrays.push_back(MakeArray(100, 10)); + arrays.push_back(MakeArray(100, 20)); auto field = std::make_shared("c0", INT32); column_.reset(new Column(field, arrays)); + ASSERT_EQ("c0", column_->name()); + ASSERT_TRUE(column_->type()->Equals(INT32)); ASSERT_EQ(300, column_->length()); ASSERT_EQ(30, column_->null_count()); ASSERT_EQ(3, column_->data()->num_chunks()); } TEST_F(TestColumn, ChunksInhomogeneous) { + ArrayVector arrays; + arrays.push_back(MakeArray(100)); + arrays.push_back(MakeArray(100, 10)); + + auto field = std::make_shared("c0", INT32); + column_.reset(new Column(field, arrays)); + + ASSERT_OK(column_->ValidateData()); + + arrays.push_back(MakeArray(100, 10)); + column_.reset(new Column(field, arrays)); + ASSERT_RAISES(Invalid, column_->ValidateData()); } } // namespace arrow diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc index bb0d833b36c32..a66a1c11c5a01 100644 --- a/cpp/src/arrow/table/column.cc +++ b/cpp/src/arrow/table/column.cc @@ -18,8 +18,10 @@ #include "arrow/table/column.h" #include +#include #include "arrow/field.h" +#include "arrow/util/status.h" namespace arrow { @@ -42,4 +44,19 @@ Column::Column(const std::shared_ptr& field, field_(field), data_(data) {} +Status Column::ValidateData() { + for (int i = 0; i < data_->num_chunks(); ++i) { + const std::shared_ptr& type = data_->chunk(i)->type(); + if (!this->type()->Equals(type)) { + std::stringstream ss; + ss << "In chunk " << i << " expected type " + << this->type()->ToString() + << " but saw " + << type->ToString(); + return Status::Invalid(ss.str()); + } + } + return Status::OK(); +} + } // namespace arrow diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h index 8460059b83301..9e9064e86545d 100644 --- a/cpp/src/arrow/table/column.h +++ b/cpp/src/arrow/table/column.h @@ -48,7 +48,9 @@ class ChunkedArray { return chunks_.size(); } - const std::shared_ptr& chunk(int i) const; + const std::shared_ptr& chunk(int i) const { + return chunks_[i]; + } protected: ArrayVector chunks_; diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index e5504c4713d17..4193a0e8bc851 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -142,10 +142,15 @@ struct DataType { nullable(nullable) {} virtual bool Equals(const DataType* other) { + // Call with a pointer so more friendly to subclasses return this == other || (this->type == other->type && this->nullable == other->nullable); } + bool Equals(const std::shared_ptr& other) { + return Equals(other.get()); + } + virtual std::string ToString() const = 0; }; diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index 1fc83536db8c6..f39fe5c4d811b 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -132,7 +132,7 @@ class ListBuilder : public Int32Builder { // // If passed, null_bytes is of equal length to values, and any nonzero byte // will be considered as a null for that slot - Status Append(T* values, int32_t length, uint8_t* null_bytes = nullptr) { + Status Append(value_type* values, int32_t length, uint8_t* null_bytes = nullptr) { if (length_ + length > capacity_) { int32_t new_capacity = util::next_power2(length_ + length); RETURN_NOT_OK(Resize(new_capacity)); diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index 49040fb66268f..09d43e7ec8b80 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -60,7 +60,7 @@ class PrimitiveArray : public Array { template class PrimitiveArrayImpl : public PrimitiveArray { public: - typedef typename TypeClass::c_type T; + typedef typename TypeClass::c_type value_type; PrimitiveArrayImpl() : PrimitiveArray() {} @@ -81,9 +81,11 @@ class PrimitiveArrayImpl : public PrimitiveArray { return PrimitiveArray::Equals(*static_cast(&other)); } - const T* raw_data() const { return reinterpret_cast(raw_data_);} + const value_type* raw_data() const { + return reinterpret_cast(raw_data_); + } - T Value(int i) const { + value_type Value(int i) const { return raw_data()[i]; } @@ -96,12 +98,12 @@ class PrimitiveArrayImpl : public PrimitiveArray { template class PrimitiveBuilder : public ArrayBuilder { public: - typedef typename Type::c_type T; + typedef typename Type::c_type value_type; explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) : ArrayBuilder(pool, type), values_(nullptr) { - elsize_ = sizeof(T); + elsize_ = sizeof(value_type); } virtual ~PrimitiveBuilder() {} @@ -141,7 +143,7 @@ class PrimitiveBuilder : public ArrayBuilder { } // Scalar append - Status Append(T val, bool is_null = false) { + Status Append(value_type val, bool is_null = false) { if (length_ == capacity_) { // If the capacity was not already a multiple of 2, do so here RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1))); @@ -158,7 +160,7 @@ class PrimitiveBuilder : public ArrayBuilder { // // If passed, null_bytes is of equal length to values, and any nonzero byte // will be considered as a null for that slot - Status Append(const T* values, int32_t length, + Status Append(const value_type* values, int32_t length, const uint8_t* null_bytes = nullptr) { if (length_ + length > capacity_) { int32_t new_capacity = util::next_power2(length_ + length); @@ -215,8 +217,8 @@ class PrimitiveBuilder : public ArrayBuilder { return Status::OK(); } - T* raw_buffer() { - return reinterpret_cast(values_->mutable_data()); + value_type* raw_buffer() { + return reinterpret_cast(values_->mutable_data()); } std::shared_ptr buffer() const { diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index 841f617a3139c..5e7197f901222 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -33,6 +33,10 @@ static inline int64_t ceil_byte(int64_t size) { return (size + 7) & ~7; } +static inline int64_t bytes_for_bits(int64_t size) { + return ceil_byte(size) / 8; +} + static inline int64_t ceil_2bytes(int64_t size) { return (size + 15) & ~15; } From 1835d33b80820d3247d2c3e037466ee219284a89 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 4 Mar 2016 14:27:18 -0800 Subject: [PATCH 7/7] Don't use auto --- cpp/src/arrow/table/column.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc index a66a1c11c5a01..82750cf4d4306 100644 --- a/cpp/src/arrow/table/column.cc +++ b/cpp/src/arrow/table/column.cc @@ -28,7 +28,7 @@ namespace arrow { ChunkedArray::ChunkedArray(const ArrayVector& chunks) : chunks_(chunks) { length_ = 0; - for (auto chunk : chunks) { + for (const std::shared_ptr& chunk : chunks) { length_ += chunk->length(); null_count_ += chunk->null_count(); }