dmlc · trivialfis · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/include/xgboost/host_device_vector.h b/include/xgboost/host_device_vector.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2017-2019 XGBoost contributors
+/**
+ * Copyright 2017-2026, XGBoost contributors
  */
 
 /**
@@ -64,7 +64,8 @@ namespace xgboost {
 void SetCudaSetDeviceHandler(void (*handler)(int));
 #endif  // __CUDACC__
 
-template <typename T> struct HostDeviceVectorImpl;
+template <typename T>
+struct HostDeviceVectorImpl;
 
 /*!
  * \brief Controls data access from the GPU.
@@ -78,7 +79,8 @@ template <typename T> struct HostDeviceVectorImpl;
  *   - Data is being manipulated on the host. Host has write access, device doesn't have access.
  */
 enum GPUAccess {
-  kNone, kRead,
+  kNone,
+  kRead,
   // write implies read
   kWrite
 };
@@ -88,9 +90,15 @@ class HostDeviceVector {
   static_assert(std::is_standard_layout_v<T>, "HostDeviceVector admits only POD types");
 
  public:
-  explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU());
-  HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU());
-  explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU());
+  using value_type = T;  // NOLINT
+
+ public:
+  explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU(),
+                            Context const* ctx = nullptr);
+  HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU(),
+                   Context const* ctx = nullptr);
+  explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU(),
+                            Context const* ctx = nullptr);
   ~HostDeviceVector();
 
   HostDeviceVector(const HostDeviceVector<T>&) = delete;
@@ -103,44 +111,54 @@ class HostDeviceVector {
   [[nodiscard]] std::size_t Size() const;
   [[nodiscard]] std::size_t SizeBytes() const { return this->Size() * sizeof(T); }
   [[nodiscard]] DeviceOrd Device() const;
-  common::Span<T> DeviceSpan();
-  common::Span<const T> ConstDeviceSpan() const;
-  common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
-  T* DevicePointer();
-  const T* ConstDevicePointer() const;
-  const T* DevicePointer() const { return ConstDevicePointer(); }
-
-  T* HostPointer() { return HostVector().data(); }
-  common::Span<T> HostSpan() { return common::Span<T>{HostVector()}; }
-  common::Span<T const> HostSpan() const { return common::Span<T const>{HostVector()}; }
-  common::Span<T const> ConstHostSpan() const { return HostSpan(); }
-  const T* ConstHostPointer() const { return ConstHostVector().data(); }
-  const T* HostPointer() const { return ConstHostPointer(); }
-
-  void Fill(T v);
-  void Copy(const HostDeviceVector<T>& other);
-  void Copy(const std::vector<T>& other);
-  void Copy(std::initializer_list<T> other);
-
-  void Extend(const HostDeviceVector<T>& other);
-
-  std::vector<T>& HostVector();
-  const std::vector<T>& ConstHostVector() const;
-  const std::vector<T>& HostVector() const {return ConstHostVector(); }
+  common::Span<T> DeviceSpan(Context const* ctx = nullptr);
+  common::Span<const T> ConstDeviceSpan(Context const* ctx = nullptr) const;
+  common::Span<const T> DeviceSpan(Context const* ctx = nullptr) const {
+    return ConstDeviceSpan(ctx);
+  }
+  T* DevicePointer(Context const* ctx = nullptr);
+  const T* ConstDevicePointer(Context const* ctx = nullptr) const;
+  const T* DevicePointer(Context const* ctx = nullptr) const { return ConstDevicePointer(ctx); }
+
+  T* HostPointer(Context const* ctx = nullptr) { return HostVector(ctx).data(); }
+  common::Span<T> HostSpan(Context const* ctx = nullptr) {
+    return common::Span<T>{HostVector(ctx)};
+  }
+  common::Span<T const> HostSpan(Context const* ctx = nullptr) const {
+    return common::Span<T const>{HostVector(ctx)};
+  }
+  common::Span<T const> ConstHostSpan(Context const* ctx = nullptr) const { return HostSpan(ctx); }
+  const T* ConstHostPointer(Context const* ctx = nullptr) const {
+    return ConstHostVector(ctx).data();
+  }
+  const T* HostPointer(Context const* ctx = nullptr) const { return ConstHostPointer(ctx); }
+
+  void Fill(T v, Context const* ctx = nullptr);
+  void Copy(const HostDeviceVector<T>& other, Context const* ctx = nullptr);
+  void Copy(const std::vector<T>& other, Context const* ctx = nullptr);
+  void Copy(std::initializer_list<T> other, Context const* ctx = nullptr);
+
+  void Extend(const HostDeviceVector<T>& other, Context const* ctx = nullptr);
+
+  std::vector<T>& HostVector(Context const* ctx = nullptr);
+  const std::vector<T>& ConstHostVector(Context const* ctx = nullptr) const;
+  const std::vector<T>& HostVector(Context const* ctx = nullptr) const {
+    return ConstHostVector(ctx);
+  }
 
   [[nodiscard]] bool HostCanRead() const;
   [[nodiscard]] bool HostCanWrite() const;
   [[nodiscard]] bool DeviceCanRead() const;
   [[nodiscard]] bool DeviceCanWrite() const;
   [[nodiscard]] GPUAccess DeviceAccess() const;
-
-  void SetDevice(DeviceOrd device) const;
+  // FIXME(jiamingy): Until we can fully unify the context, we will have both ctx and device here.
+  void SetDevice(DeviceOrd device, Context const* ctx = nullptr) const;
 
   void Resize(std::size_t new_size);
-  /** @brief Resize and initialize the data if the new size is larger than the old size. */
-  void Resize(std::size_t new_size, T v);
+  void Resize(Context const* ctx, std::size_t new_size);
 
-  using value_type = T;  // NOLINT
+  /** @brief Resize and initialize the data if the new size is larger than the old size. */
+  void Resize(Context const* ctx, std::size_t new_size, T v);
 
  private:
   HostDeviceVectorImpl<T>* impl_;

diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
@@ -595,13 +595,13 @@ auto MakeTensorView(Context const *ctx, Order order, common::Span<T, ext> data,
 
 template <typename T, typename... S>
 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
-  auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
+  auto span = ctx->IsCPU() ? data->HostSpan(ctx) : data->DeviceSpan(ctx);
   return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
 }
 
 template <typename T, typename... S>
 auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
-  auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
+  auto span = ctx->IsCPU() ? data->ConstHostSpan(ctx) : data->ConstDeviceSpan(ctx);
   return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
 }
 
@@ -768,15 +768,15 @@ class Tensor {
   Order order_{Order::kC};
 
   template <typename I, std::int32_t D>
-  void Initialize(I const (&shape)[D], DeviceOrd device) {
+  void Initialize(I const (&shape)[D], DeviceOrd device, Context const *ctx = nullptr) {
     static_assert(D <= kDim, "Invalid shape.");
     std::copy(shape, shape + D, shape_);
     for (auto i = D; i < kDim; ++i) {
       shape_[i] = 1;
     }
     if (!device.IsCPU()) {
-      data_.SetDevice(device);
-      data_.ConstDevicePointer();  // Pull to device;
+      data_.SetDevice(device, ctx);
+      data_.ConstDevicePointer(ctx);  // Pull to device;
     }
     CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
   }
@@ -791,11 +791,13 @@ class Tensor {
    * See \ref TensorView for parameters of this constructor.
    */
   template <typename I, int32_t D>
-  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
-      : Tensor{common::Span<I const, D>{shape}, device, order} {}
+  explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC,
+                  Context const *ctx = nullptr)
+      : Tensor{common::Span<I const, D>{shape}, device, order, ctx} {}
 
   template <typename I, size_t D>
-  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
+  explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC,
+                  Context const *ctx = nullptr)
       : order_{order} {
     // No device unroll as this is a host only function.
     std::copy(shape.data(), shape.data() + D, shape_);
@@ -804,33 +806,34 @@ class Tensor {
     }
     auto size = detail::CalcSize(shape_);
     if (!device.IsCPU()) {
-      data_.SetDevice(device);
+      data_.SetDevice(device, ctx);
     }
-    data_.Resize(size);
+    data_.Resize(ctx, size);
     if (!device.IsCPU()) {
-      data_.DevicePointer();  // Pull to device
+      data_.DevicePointer(ctx);  // Pull to device
     }
   }
   /**
    * Initialize from 2 host iterators.
    */
   template <typename It, typename I, int32_t D>
-  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
+  explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC,
+                  Context const *ctx = nullptr)
       : order_{order} {
     auto &h_vec = data_.HostVector();
     h_vec.insert(h_vec.begin(), begin, end);
     // shape
-    this->Initialize(shape, device);
+    this->Initialize(shape, device, ctx);
   }
 
   template <typename I, int32_t D>
   explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
-                  Order order = kC)
+                  Order order = kC, Context const *ctx = nullptr)
       : order_{order} {
     auto &h_vec = data_.HostVector();
     h_vec = data;
     // shape
-    this->Initialize(shape, device);
+    this->Initialize(shape, device, ctx);
   }
   /**
    * \brief Index operator. Not thread safe, should not be used in performance critical
@@ -852,29 +855,29 @@ class Tensor {
   /**
    * @brief Get a @ref TensorView for this tensor.
    */
-  auto View(DeviceOrd device) {
+  auto View(DeviceOrd device, Context const *ctx = nullptr) {
     if (device.IsCPU()) {
-      auto span = data_.HostSpan();
+      auto span = data_.HostSpan(ctx);
       return TensorView<T, kDim>{span, shape_, device, order_};
     } else {
-      data_.SetDevice(device);
-      auto span = data_.DeviceSpan();
+      data_.SetDevice(device, ctx);
+      auto span = data_.DeviceSpan(ctx);
       return TensorView<T, kDim>{span, shape_, device, order_};
     }
   }
-  auto View(DeviceOrd device) const {
+  auto View(DeviceOrd device, Context const *ctx = nullptr) const {
     if (device.IsCPU()) {
-      auto span = data_.ConstHostSpan();
+      auto span = data_.ConstHostSpan(ctx);
       return TensorView<T const, kDim>{span, shape_, device, order_};
     } else {
-      data_.SetDevice(device);
-      auto span = data_.ConstDeviceSpan();
+      data_.SetDevice(device, ctx);
+      auto span = data_.ConstDeviceSpan(ctx);
       return TensorView<T const, kDim>{span, shape_, device, order_};
     }
   }
 
-  auto HostView() { return this->View(DeviceOrd::CPU()); }
-  auto HostView() const { return this->View(DeviceOrd::CPU()); }
+  auto HostView(Context const *ctx = nullptr) { return this->View(DeviceOrd::CPU(), ctx); }
+  auto HostView(Context const *ctx = nullptr) const { return this->View(DeviceOrd::CPU(), ctx); }
 
   [[nodiscard]] std::size_t Size() const { return data_.Size(); }
   [[nodiscard]] bool Empty() const { return Size() == 0; }
@@ -950,7 +953,9 @@ class Tensor {
   /**
    * \brief Set device ordinal for this tensor.
    */
-  void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
+  void SetDevice(DeviceOrd device, Context const *ctx = nullptr) const {
+    data_.SetDevice(device, ctx);
+  }
   [[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
 };
 
@@ -966,7 +971,7 @@ using Vector = Tensor<T, 1>;
 template <typename T, typename... Index>
 auto Empty(Context const *ctx, Index &&...index) {
   Tensor<T, sizeof...(Index)> t;
-  t.SetDevice(ctx->Device());
+  t.SetDevice(ctx->Device(), ctx);
   t.Reshape(index...);
   return t;
 }
@@ -977,7 +982,7 @@ auto Empty(Context const *ctx, Index &&...index) {
 template <typename T, std::int32_t kDim>
 auto EmptyLike(Context const *ctx, Tensor<T, kDim> const &in) {
   Tensor<T, kDim> t;
-  t.SetDevice(ctx->Device());
+  t.SetDevice(ctx->Device(), ctx);
   t.Reshape(in.Shape());
   return t;
 }
@@ -988,9 +993,9 @@ auto EmptyLike(Context const *ctx, Tensor<T, kDim> const &in) {
 template <typename T, typename... Index>
 auto Constant(Context const *ctx, T v, Index &&...index) {
   Tensor<T, sizeof...(Index)> t;
-  t.SetDevice(ctx->Device());
+  t.SetDevice(ctx->Device(), ctx);
   t.Reshape(index...);
-  t.Data()->Fill(std::move(v));
+  t.Data()->Fill(std::move(v), ctx);
   return t;
 }
 

diff --git a/include/xgboost/multi_target_tree_model.h b/include/xgboost/multi_target_tree_model.h
@@ -90,12 +90,12 @@ class MultiTargetTree : public Model {
    * @param weight   The weight vector for the root node.
    * @param sum_hess The sum of hessians for the root node (coverage).
    */
-  void SetRoot(linalg::VectorView<float const> weight, float sum_hess);
+  void SetRoot(Context const* ctx, linalg::VectorView<float const> weight, float sum_hess);
   /**
    * @brief Expand a leaf into split node.
    */
-  void Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond, bool default_left,
-              linalg::VectorView<float const> base_weight,
+  void Expand(Context const* ctx, bst_node_t nidx, bst_feature_t split_idx, float split_cond,
+              bool default_left, linalg::VectorView<float const> base_weight,
               linalg::VectorView<float const> left_weight,
               linalg::VectorView<float const> right_weight, float loss_chg, float sum_hess,
               float left_sum, float right_sum);

diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
@@ -315,8 +315,8 @@ class RegTree : public Model {
    * @param left_sum  The sum of hessians for the left child (coverage).
    * @param right_sum The sum of hessians for the right child (coverage).
    */
-  void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
-                  linalg::VectorView<float const> base_weight,
+  void ExpandNode(Context const* ctx, bst_node_t nidx, bst_feature_t split_index, float split_cond,
+                  bool default_left, linalg::VectorView<float const> base_weight,
                   linalg::VectorView<float const> left_weight,
                   linalg::VectorView<float const> right_weight, float loss_chg, float sum_hess,
                   float left_sum, float right_sum);
@@ -355,7 +355,7 @@ class RegTree : public Model {
   /**
    * @brief Expands a leaf node with categories for a multi-target tree.
    */
-  void ExpandCategorical(bst_node_t nidx, bst_feature_t split_index,
+  void ExpandCategorical(Context const* ctx, bst_node_t nidx, bst_feature_t split_index,
                          common::Span<const uint32_t> split_cat, bool default_left,
                          linalg::VectorView<float const> base_weight,
                          linalg::VectorView<float const> left_weight,
@@ -414,9 +414,9 @@ class RegTree : public Model {
    * @param weight   Internal split weight, with size equals to reduced targets.
    * @param sum_hess The sum of hessians for the root node (coverage).
    */
-  void SetRoot(linalg::VectorView<float const> weight, float sum_hess) {
+  void SetRoot(Context const* ctx, linalg::VectorView<float const> weight, float sum_hess) {
     CHECK(IsMultiTarget());
-    return this->p_mt_tree_->SetRoot(weight, sum_hess);
+    return this->p_mt_tree_->SetRoot(ctx, weight, sum_hess);
   }
   /**
    * @brief Get the maximum depth.