Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mean computation for the geometric distribution in the data generator #15282

Merged
12 changes: 6 additions & 6 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,14 @@ double get_distribution_mean(distribution_params<T> const& dist)
case distribution_id::NORMAL:
case distribution_id::UNIFORM: return (dist.lower_bound / 2.) + (dist.upper_bound / 2.);
case distribution_id::GEOMETRIC: {
auto const range_size = dist.lower_bound < dist.upper_bound
? dist.upper_bound - dist.lower_bound
: dist.lower_bound - dist.upper_bound;
auto const p = geometric_dist_p(range_size);
// In the current implementation, the geometric distribution is
// approximated by absolute value of a uniform distribution
auto const gauss_std_dev = geometric_as_gauss_std_dev(dist.lower_bound, dist.upper_bound);
auto const half_gauss_mean = gauss_std_dev * sqrt(2. / M_PI);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (dist.lower_bound < dist.upper_bound)
return dist.lower_bound + (1. / p);
return dist.lower_bound + half_gauss_mean;
else
return dist.lower_bound - (1. / p);
return dist.lower_bound - half_gauss_mean;
}
default: CUDF_FAIL("Unsupported distribution type.");
}
Expand Down
23 changes: 19 additions & 4 deletions cpp/benchmarks/common/random_distribution_factory.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -76,6 +76,18 @@ double geometric_dist_p(T range_size)
return p ? p : std::numeric_limits<double>::epsilon();
}

template <typename T>
constexpr double geometric_as_gauss_std_dev(T lower_bound, T upper_bound)
{
// Standard deviation of the half-normal distribution that approximates the geometric distribution
constexpr double std_dev_scale = 0.25;

// Pre-scaling to avoid underflow/overflow
auto const scaled_lower = lower_bound * std_dev_scale;
auto const scaled_upper = upper_bound * std_dev_scale;
return std::abs(scaled_upper - scaled_lower);
}

/**
* @brief Generates a geometric distribution between lower_bound and upper_bound.
* This distribution is an approximation generated using normal distribution.
Expand All @@ -92,7 +104,7 @@ class geometric_distribution : public thrust::random::normal_distribution<integr
public:
using result_type = T;
__host__ __device__ explicit geometric_distribution(T lower_bound, T upper_bound)
: super_t(0, std::labs(upper_bound - lower_bound) / 4.0),
: super_t(0, geometric_as_gauss_std_dev(lower_bound, upper_bound)),
_lower_bound(lower_bound),
_upper_bound(upper_bound)
{
Expand All @@ -101,8 +113,11 @@ class geometric_distribution : public thrust::random::normal_distribution<integr
template <typename UniformRandomNumberGenerator>
__host__ __device__ result_type operator()(UniformRandomNumberGenerator& urng)
{
return _lower_bound < _upper_bound ? std::abs(super_t::operator()(urng)) + _lower_bound
: _lower_bound - std::abs(super_t::operator()(urng));
// Distribution always biases towards lower_bound
realType const result = _lower_bound < _upper_bound
? std::abs(super_t::operator()(urng)) + _lower_bound
: _lower_bound - std::abs(super_t::operator()(urng));
return std::round(result);
}
};

Expand Down