Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 54 additions & 36 deletions include/xgboost/host_device_vector.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2019 XGBoost contributors
/**
* Copyright 2017-2026, XGBoost contributors
*/

/**
Expand Down Expand Up @@ -64,7 +64,8 @@ namespace xgboost {
void SetCudaSetDeviceHandler(void (*handler)(int));
#endif // __CUDACC__

template <typename T> struct HostDeviceVectorImpl;
template <typename T>
struct HostDeviceVectorImpl;

/*!
* \brief Controls data access from the GPU.
Expand All @@ -78,7 +79,8 @@ template <typename T> struct HostDeviceVectorImpl;
* - Data is being manipulated on the host. Host has write access, device doesn't have access.
*/
enum GPUAccess {
kNone, kRead,
kNone,
kRead,
// write implies read
kWrite
};
Expand All @@ -88,9 +90,15 @@ class HostDeviceVector {
static_assert(std::is_standard_layout_v<T>, "HostDeviceVector admits only POD types");

public:
explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU());
HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU());
explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU());
using value_type = T; // NOLINT

public:
explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU(),
Context const* ctx = nullptr);
HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU(),
Context const* ctx = nullptr);
explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU(),
Context const* ctx = nullptr);
~HostDeviceVector();

HostDeviceVector(const HostDeviceVector<T>&) = delete;
Expand All @@ -103,44 +111,54 @@ class HostDeviceVector {
[[nodiscard]] std::size_t Size() const;
[[nodiscard]] std::size_t SizeBytes() const { return this->Size() * sizeof(T); }
[[nodiscard]] DeviceOrd Device() const;
common::Span<T> DeviceSpan();
common::Span<const T> ConstDeviceSpan() const;
common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
T* DevicePointer();
const T* ConstDevicePointer() const;
const T* DevicePointer() const { return ConstDevicePointer(); }

T* HostPointer() { return HostVector().data(); }
common::Span<T> HostSpan() { return common::Span<T>{HostVector()}; }
common::Span<T const> HostSpan() const { return common::Span<T const>{HostVector()}; }
common::Span<T const> ConstHostSpan() const { return HostSpan(); }
const T* ConstHostPointer() const { return ConstHostVector().data(); }
const T* HostPointer() const { return ConstHostPointer(); }

void Fill(T v);
void Copy(const HostDeviceVector<T>& other);
void Copy(const std::vector<T>& other);
void Copy(std::initializer_list<T> other);

void Extend(const HostDeviceVector<T>& other);

std::vector<T>& HostVector();
const std::vector<T>& ConstHostVector() const;
const std::vector<T>& HostVector() const {return ConstHostVector(); }
common::Span<T> DeviceSpan(Context const* ctx = nullptr);
common::Span<const T> ConstDeviceSpan(Context const* ctx = nullptr) const;
common::Span<const T> DeviceSpan(Context const* ctx = nullptr) const {
return ConstDeviceSpan(ctx);
}
T* DevicePointer(Context const* ctx = nullptr);
const T* ConstDevicePointer(Context const* ctx = nullptr) const;
const T* DevicePointer(Context const* ctx = nullptr) const { return ConstDevicePointer(ctx); }

T* HostPointer(Context const* ctx = nullptr) { return HostVector(ctx).data(); }
common::Span<T> HostSpan(Context const* ctx = nullptr) {
return common::Span<T>{HostVector(ctx)};
}
common::Span<T const> HostSpan(Context const* ctx = nullptr) const {
return common::Span<T const>{HostVector(ctx)};
}
common::Span<T const> ConstHostSpan(Context const* ctx = nullptr) const { return HostSpan(ctx); }
const T* ConstHostPointer(Context const* ctx = nullptr) const {
return ConstHostVector(ctx).data();
}
const T* HostPointer(Context const* ctx = nullptr) const { return ConstHostPointer(ctx); }

void Fill(T v, Context const* ctx = nullptr);
void Copy(const HostDeviceVector<T>& other, Context const* ctx = nullptr);
void Copy(const std::vector<T>& other, Context const* ctx = nullptr);
void Copy(std::initializer_list<T> other, Context const* ctx = nullptr);

void Extend(const HostDeviceVector<T>& other, Context const* ctx = nullptr);

std::vector<T>& HostVector(Context const* ctx = nullptr);
const std::vector<T>& ConstHostVector(Context const* ctx = nullptr) const;
const std::vector<T>& HostVector(Context const* ctx = nullptr) const {
return ConstHostVector(ctx);
}

[[nodiscard]] bool HostCanRead() const;
[[nodiscard]] bool HostCanWrite() const;
[[nodiscard]] bool DeviceCanRead() const;
[[nodiscard]] bool DeviceCanWrite() const;
[[nodiscard]] GPUAccess DeviceAccess() const;

void SetDevice(DeviceOrd device) const;
// FIXME(jiamingy): Until we can fully unify the context, we will have both ctx and device here.
void SetDevice(DeviceOrd device, Context const* ctx = nullptr) const;

void Resize(std::size_t new_size);
/** @brief Resize and initialize the data if the new size is larger than the old size. */
void Resize(std::size_t new_size, T v);
void Resize(Context const* ctx, std::size_t new_size);

using value_type = T; // NOLINT
/** @brief Resize and initialize the data if the new size is larger than the old size. */
void Resize(Context const* ctx, std::size_t new_size, T v);

private:
HostDeviceVectorImpl<T>* impl_;
Expand Down
65 changes: 35 additions & 30 deletions include/xgboost/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,13 +595,13 @@ auto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data,

template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
auto span = ctx->IsCPU() ? data->HostSpan(ctx) : data->DeviceSpan(ctx);
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
}

template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
auto span = ctx->IsCPU() ? data->ConstHostSpan(ctx) : data->ConstDeviceSpan(ctx);
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
}

Expand Down Expand Up @@ -768,15 +768,15 @@ class Tensor {
Order order_{Order::kC};

template <typename I, std::int32_t D>
void Initialize(I const (&shape)[D], DeviceOrd device) {
void Initialize(I const (&shape)[D], DeviceOrd device, Context const *ctx = nullptr) {
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, shape_);
for (auto i = D; i < kDim; ++i) {
shape_[i] = 1;
}
if (!device.IsCPU()) {
data_.SetDevice(device);
data_.ConstDevicePointer(); // Pull to device;
data_.SetDevice(device, ctx);
data_.ConstDevicePointer(ctx); // Pull to device;
}
CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
}
Expand All @@ -791,11 +791,13 @@ class Tensor {
* See \ref TensorView for parameters of this constructor.
*/
template <typename I, int32_t D>
explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
: Tensor{common::Span<I const, D>{shape}, device, order} {}
explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC,
Context const *ctx = nullptr)
: Tensor{common::Span<I const, D>{shape}, device, order, ctx} {}

template <typename I, size_t D>
explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC,
Context const *ctx = nullptr)
: order_{order} {
// No device unroll as this is a host only function.
std::copy(shape.data(), shape.data() + D, shape_);
Expand All @@ -804,33 +806,34 @@ class Tensor {
}
auto size = detail::CalcSize(shape_);
if (!device.IsCPU()) {
data_.SetDevice(device);
data_.SetDevice(device, ctx);
}
data_.Resize(size);
data_.Resize(ctx, size);
if (!device.IsCPU()) {
data_.DevicePointer(); // Pull to device
data_.DevicePointer(ctx); // Pull to device
}
}
/**
* Initialize from 2 host iterators.
*/
template <typename It, typename I, int32_t D>
explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC,
Context const *ctx = nullptr)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec.insert(h_vec.begin(), begin, end);
// shape
this->Initialize(shape, device);
this->Initialize(shape, device, ctx);
}

template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
Order order = kC)
Order order = kC, Context const *ctx = nullptr)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec = data;
// shape
this->Initialize(shape, device);
this->Initialize(shape, device, ctx);
}
/**
* \brief Index operator. Not thread safe, should not be used in performance critical
Expand All @@ -852,29 +855,29 @@ class Tensor {
/**
* @brief Get a @ref TensorView for this tensor.
*/
auto View(DeviceOrd device) {
auto View(DeviceOrd device, Context const *ctx = nullptr) {
if (device.IsCPU()) {
auto span = data_.HostSpan();
auto span = data_.HostSpan(ctx);
return TensorView<T, kDim>{span, shape_, device, order_};
} else {
data_.SetDevice(device);
auto span = data_.DeviceSpan();
data_.SetDevice(device, ctx);
auto span = data_.DeviceSpan(ctx);
return TensorView<T, kDim>{span, shape_, device, order_};
}
}
auto View(DeviceOrd device) const {
auto View(DeviceOrd device, Context const *ctx = nullptr) const {
if (device.IsCPU()) {
auto span = data_.ConstHostSpan();
auto span = data_.ConstHostSpan(ctx);
return TensorView<T const, kDim>{span, shape_, device, order_};
} else {
data_.SetDevice(device);
auto span = data_.ConstDeviceSpan();
data_.SetDevice(device, ctx);
auto span = data_.ConstDeviceSpan(ctx);
return TensorView<T const, kDim>{span, shape_, device, order_};
}
}

auto HostView() { return this->View(DeviceOrd::CPU()); }
auto HostView() const { return this->View(DeviceOrd::CPU()); }
auto HostView(Context const *ctx = nullptr) { return this->View(DeviceOrd::CPU(), ctx); }
auto HostView(Context const *ctx = nullptr) const { return this->View(DeviceOrd::CPU(), ctx); }

[[nodiscard]] std::size_t Size() const { return data_.Size(); }
[[nodiscard]] bool Empty() const { return Size() == 0; }
Expand Down Expand Up @@ -950,7 +953,9 @@ class Tensor {
/**
* \brief Set device ordinal for this tensor.
*/
void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
void SetDevice(DeviceOrd device, Context const *ctx = nullptr) const {
data_.SetDevice(device, ctx);
}
[[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
};

Expand All @@ -966,7 +971,7 @@ using Vector = Tensor<T, 1>;
template <typename T, typename... Index>
auto Empty(Context const *ctx, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->Device());
t.SetDevice(ctx->Device(), ctx);
t.Reshape(index...);
return t;
}
Expand All @@ -977,7 +982,7 @@ auto Empty(Context const *ctx, Index &&...index) {
template <typename T, std::int32_t kDim>
auto EmptyLike(Context const *ctx, Tensor<T, kDim> const &in) {
Tensor<T, kDim> t;
t.SetDevice(ctx->Device());
t.SetDevice(ctx->Device(), ctx);
t.Reshape(in.Shape());
return t;
}
Expand All @@ -988,9 +993,9 @@ auto EmptyLike(Context const *ctx, Tensor<T, kDim> const &in) {
template <typename T, typename... Index>
auto Constant(Context const *ctx, T v, Index &&...index) {
Tensor<T, sizeof...(Index)> t;
t.SetDevice(ctx->Device());
t.SetDevice(ctx->Device(), ctx);
t.Reshape(index...);
t.Data()->Fill(std::move(v));
t.Data()->Fill(std::move(v), ctx);
return t;
}

Expand Down
6 changes: 3 additions & 3 deletions include/xgboost/multi_target_tree_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ class MultiTargetTree : public Model {
* @param weight The weight vector for the root node.
* @param sum_hess The sum of hessians for the root node (coverage).
*/
void SetRoot(linalg::VectorView<float const> weight, float sum_hess);
void SetRoot(Context const* ctx, linalg::VectorView<float const> weight, float sum_hess);
/**
* @brief Expand a leaf into split node.
*/
void Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond, bool default_left,
linalg::VectorView<float const> base_weight,
void Expand(Context const* ctx, bst_node_t nidx, bst_feature_t split_idx, float split_cond,
bool default_left, linalg::VectorView<float const> base_weight,
linalg::VectorView<float const> left_weight,
linalg::VectorView<float const> right_weight, float loss_chg, float sum_hess,
float left_sum, float right_sum);
Expand Down
10 changes: 5 additions & 5 deletions include/xgboost/tree_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ class RegTree : public Model {
* @param left_sum The sum of hessians for the left child (coverage).
* @param right_sum The sum of hessians for the right child (coverage).
*/
void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
linalg::VectorView<float const> base_weight,
void ExpandNode(Context const* ctx, bst_node_t nidx, bst_feature_t split_index, float split_cond,
bool default_left, linalg::VectorView<float const> base_weight,
linalg::VectorView<float const> left_weight,
linalg::VectorView<float const> right_weight, float loss_chg, float sum_hess,
float left_sum, float right_sum);
Expand Down Expand Up @@ -355,7 +355,7 @@ class RegTree : public Model {
/**
* @brief Expands a leaf node with categories for a multi-target tree.
*/
void ExpandCategorical(bst_node_t nidx, bst_feature_t split_index,
void ExpandCategorical(Context const* ctx, bst_node_t nidx, bst_feature_t split_index,
common::Span<const uint32_t> split_cat, bool default_left,
linalg::VectorView<float const> base_weight,
linalg::VectorView<float const> left_weight,
Expand Down Expand Up @@ -414,9 +414,9 @@ class RegTree : public Model {
* @param weight Internal split weight, with size equals to reduced targets.
* @param sum_hess The sum of hessians for the root node (coverage).
*/
void SetRoot(linalg::VectorView<float const> weight, float sum_hess) {
void SetRoot(Context const* ctx, linalg::VectorView<float const> weight, float sum_hess) {
CHECK(IsMultiTarget());
return this->p_mt_tree_->SetRoot(weight, sum_hess);
return this->p_mt_tree_->SetRoot(ctx, weight, sum_hess);
}
/**
* @brief Get the maximum depth.
Expand Down
Loading