Skip to content

Commit

Permalink
Workaround compute-sanitizer memcheck bug (#15259)
Browse files Browse the repository at this point in the history
Provides a workaround for the compute-sanitizer issue described in #15258 causing memcheck failures in nightly builds.
An environment variable is introduced `LIBCUDF_MEMCHECK_ENABLED` so test code can bypass specific tests that cause the compute-sanitizer error. The env var is set only during memcheck tests since the failure does not occur in normal testing.
The failure only occurs for some `int16` or `uint16` reduction tests so managing these few tests is reasonable.

Other possible workarounds include
1. Reverting the compute-sanitizer to 11.8
   Using the latest version is more desirable since the fix will likely not be back ported.
2. Adding an exclude filter to the CUB Reduce kernel
   This disables checking for almost all reduction kernels

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Jake Awe (https://github.com/AyodeAwe)

URL: #15259
  • Loading branch information
davidwendt committed Mar 11, 2024
1 parent c4f1a26 commit a09c215
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 6 deletions.
3 changes: 3 additions & 0 deletions ci/run_cudf_memcheck_ctests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ trap "EXITCODE=1" ERR
cd "${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/gtests/libcudf/";

export GTEST_CUDF_RMM_MODE=cuda
# compute-sanitizer bug 4553815
export LIBCUDF_MEMCHECK_ENABLED=1
for gt in ./*_TEST ; do
test_name=$(basename ${gt})
# Run gtests with compute-sanitizer
Expand All @@ -20,5 +22,6 @@ for gt in ./*_TEST ; do
compute-sanitizer --tool memcheck ${gt} "$@"
done
unset GTEST_CUDF_RMM_MODE
unset LIBCUDF_MEMCHECK_ENABLED

exit ${EXITCODE}
16 changes: 14 additions & 2 deletions cpp/tests/iterator/value_iterator_test_numeric.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,17 @@ template <typename T>
struct NumericValueIteratorTest : public IteratorTest<T> {};

TYPED_TEST_SUITE(NumericValueIteratorTest, TestingTypes);
TYPED_TEST(NumericValueIteratorTest, non_null_iterator) { non_null_iterator(*this); }
TYPED_TEST(NumericValueIteratorTest, null_iterator) { null_iterator(*this); }
TYPED_TEST(NumericValueIteratorTest, non_null_iterator)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}
non_null_iterator(*this);
}
TYPED_TEST(NumericValueIteratorTest, null_iterator)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}
null_iterator(*this);
}
10 changes: 7 additions & 3 deletions cpp/tests/reductions/reduction_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ struct ReductionTest : public cudf::test::BaseFixture {
template <typename T>
struct MinMaxReductionTest : public ReductionTest<T> {};

using MinMaxTypes = cudf::test::Types<int16_t, int32_t, float, double>;
using MinMaxTypes = cudf::test::Types<int32_t, int64_t, float, double>;
TYPED_TEST_SUITE(MinMaxReductionTest, MinMaxTypes);

// ------------------------------------------------------------------------
Expand Down Expand Up @@ -299,6 +299,10 @@ TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes);
TYPED_TEST(ReductionTest, Product)
{
using T = TypeParam;
if constexpr (std::is_same_v<T, int16_t> || std::is_same_v<T, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}

std::vector<int> int_values({5, -1, 1, 0, 3, 2, 4});
std::vector<bool> host_bools({1, 1, 0, 0, 1, 1, 1});
std::vector<TypeParam> v = convert_values<TypeParam>(int_values);
Expand Down Expand Up @@ -2272,7 +2276,7 @@ TEST_P(DictionaryStringReductionTest, MinMax)

template <typename T>
struct DictionaryAnyAllTest : public ReductionTest<bool> {};
using DictionaryAnyAllTypes = cudf::test::Types<int16_t, int32_t, float, double, bool>;
using DictionaryAnyAllTypes = cudf::test::Types<int32_t, int64_t, float, double, bool>;
TYPED_TEST_SUITE(DictionaryAnyAllTest, cudf::test::NumericTypes);
TYPED_TEST(DictionaryAnyAllTest, AnyAll)
{
Expand Down Expand Up @@ -2328,7 +2332,7 @@ TYPED_TEST(DictionaryAnyAllTest, AnyAll)
template <typename T>
struct DictionaryReductionTest : public ReductionTest<T> {};

using DictionaryTypes = cudf::test::Types<int16_t, int32_t, float, double>;
using DictionaryTypes = cudf::test::Types<int32_t, int64_t, float, double>;
TYPED_TEST_SUITE(DictionaryReductionTest, DictionaryTypes);
TYPED_TEST(DictionaryReductionTest, Sum)
{
Expand Down
26 changes: 25 additions & 1 deletion cpp/tests/reductions/segmented_reduction_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -87,6 +87,10 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls)

TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}

// [1, 3, 5], [null, 3, 5], [1], [null], [null, null], []
// values: {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}
// offsets: {0, 3, 6, 7, 8, 10, 10}
Expand Down Expand Up @@ -137,6 +141,10 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)

TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}

// [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
// values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
// offsets: {0, 3, 6, 7, 8, 10, 10}
Expand Down Expand Up @@ -185,6 +193,10 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)

TYPED_TEST(SegmentedReductionTest, MinExcludeNulls)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}

// [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
// values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
// offsets: {0, 3, 6, 7, 8, 10, 10}
Expand Down Expand Up @@ -376,6 +388,10 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls)

TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}

// [1, 3, 5], [null, 3, 5], [1], [null], [null, null], []
// values: {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}
// offsets: {0, 3, 6, 7, 8, 10, 10}
Expand Down Expand Up @@ -429,6 +445,10 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)

TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}

// [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
// values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}
// offsets: {0, 3, 6, 7, 8, 10, 10}
Expand Down Expand Up @@ -480,6 +500,10 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)

TYPED_TEST(SegmentedReductionTest, MinIncludeNulls)
{
if constexpr (std::is_same_v<TypeParam, int16_t> || std::is_same_v<TypeParam, uint16_t>) {
if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; }
}

// [1, 2, 3], [1, null, 3], [1], [null], [null, null], []
// values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX}
// offsets: {0, 3, 6, 7, 8, 10, 10}
Expand Down

0 comments on commit a09c215

Please sign in to comment.