Skip to content

Commit

Permalink
apacheGH-33936: [Go] C Data Interface: export dummy buffer for nil bu…
Browse files Browse the repository at this point in the history
…ffers (apache#33951)

### Rationale for this change

While apache#14805 clarified that NULL buffers are valid in the C Data Interface, not all implementations handle this correctly (or were fixed, but earlier versions still exist).

### What changes are included in this PR?

Export a non-NULL dummy buffer for nil/empty buffers to ensure compatibility.

### Are these changes tested?

A regression test is included.
* Closes: apache#33936

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
lidavidm authored and Mike Hancock committed Feb 17, 2023
1 parent 173574f commit da86dc7
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 2 deletions.
14 changes: 12 additions & 2 deletions go/arrow/cdata/cdata_exports.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@
package cdata

// #include <errno.h>
// #include <stdint.h>
// #include <stdlib.h>
// #include "arrow/c/abi.h"
// #include "arrow/c/helpers.h"
//
// extern void releaseExportedSchema(struct ArrowSchema* schema);
// extern void releaseExportedArray(struct ArrowArray* array);
//
// const uint8_t kGoCdataZeroRegion[8] = {0};
//
// void goReleaseArray(struct ArrowArray* array) {
// releaseExportedArray(array);
// }
Expand All @@ -45,6 +48,7 @@ import (
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/endian"
"github.com/apache/arrow/go/v12/arrow/internal"
"github.com/apache/arrow/go/v12/arrow/ipc"
)

Expand Down Expand Up @@ -376,7 +380,7 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) {
// unions don't have validity bitmaps, but we keep them shifted
// to make processing easier in other contexts. This means that
// we have to adjust for union arrays
if arr.DataType().ID() == arrow.DENSE_UNION || arr.DataType().ID() == arrow.SPARSE_UNION {
if !internal.DefaultHasValidityBitmap(arr.DataType().ID()) {
out.n_buffers--
nbuffers--
bufs = bufs[1:]
Expand All @@ -385,7 +389,13 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) {
for i := range bufs {
buf := bufs[i]
if buf == nil || buf.Len() == 0 {
buffers[i] = nil
if i > 0 || !internal.DefaultHasValidityBitmap(arr.DataType().ID()) {
// apache/arrow#33936: export a dummy buffer to be friendly to
// implementations that don't import NULL properly
buffers[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion))
} else {
buffers[i] = nil
}
continue
}

Expand Down
45 changes: 45 additions & 0 deletions go/arrow/cdata/cdata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,51 @@ func TestEmptyDictExport(t *testing.T) {
assert.Nil(t, out.dictionary.dictionary)
}

func TestEmptyStringExport(t *testing.T) {
// apache/arrow#33936: regression test
bldr := array.NewBuilder(memory.DefaultAllocator, &arrow.StringType{})
defer bldr.Release()

arr := bldr.NewArray()
defer arr.Release()

var out CArrowArray
var sc CArrowSchema
ExportArrowArray(arr, &out, &sc)

assert.EqualValues(t, 'u', *sc.format)
assert.Zero(t, sc.n_children)
assert.Nil(t, sc.dictionary)

assert.EqualValues(t, 3, out.n_buffers)
buffers := (*[3]unsafe.Pointer)(unsafe.Pointer(out.buffers))
assert.EqualValues(t, unsafe.Pointer(nil), buffers[0])
assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[1])
assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[2])
}

func TestEmptyUnionExport(t *testing.T) {
// apache/arrow#33936: regression test
bldr := array.NewBuilder(memory.DefaultAllocator, arrow.SparseUnionOf([]arrow.Field{
{Name: "child", Type: &arrow.Int64Type{}},
}, []arrow.UnionTypeCode{0}))
defer bldr.Release()

arr := bldr.NewArray()
defer arr.Release()

var out CArrowArray
var sc CArrowSchema
ExportArrowArray(arr, &out, &sc)

assert.EqualValues(t, 1, sc.n_children)
assert.Nil(t, sc.dictionary)

assert.EqualValues(t, 1, out.n_buffers)
buffers := (*[1]unsafe.Pointer)(unsafe.Pointer(out.buffers))
assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[0])
}

func TestRecordReaderExport(t *testing.T) {
// Regression test for apache/arrow#33767
reclist := arrdata.Records["primitives"]
Expand Down

0 comments on commit da86dc7

Please sign in to comment.