Skip to content
This repository was archived by the owner on Apr 2, 2026. It is now read-only.

Commit 04e4c7a

Browse files
authored
Merge pull request #7 from reddit/v2.6.6-patched-251123-trace
Additional Tracing -for Perf Debugging
2 parents 99d5a2e + b8eef32 commit 04e4c7a

6 files changed

Lines changed: 73 additions & 1 deletion

File tree

internal/core/src/exec/Task.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ Task::Terminate(TaskState state) {
138138

139139
RowVectorPtr
140140
Task::Next(ContinueFuture* future) {
141+
milvus::tracer::AddEvent("task_next_start");
142+
141143
// NOTE: Task::Next is single-threaded execution
142144
AssertInfo(plan_fragment_.execution_strategy_ ==
143145
plan::ExecutionStrategy::kUngrouped,
@@ -151,24 +153,30 @@ Task::Next(ContinueFuture* future) {
151153
consumer_supplier_ == nullptr,
152154
"Single-threaded execution doesn't support delivering results to a "
153155
"callback");
154-
156+
milvus::tracer::AddEvent("before_local_planner");
157+
155158
LocalPlanner::Plan(plan_fragment_,
156159
nullptr,
157160
&driver_factories_,
158161
*query_context_->query_config(),
159162
1);
163+
milvus::tracer::AddEvent("after_local_planner");
160164

165+
milvus::tracer::AddEvent("before_driver_factory_loop");
161166
for (const auto& factory : driver_factories_) {
162167
assert(factory->SupportSingleThreadExecution());
163168
num_ungrouped_drivers_ += factory->num_drivers_;
164169
num_total_drivers_ += factory->num_total_drivers_;
165170
}
171+
milvus::tracer::AddEvent("after_driver_factory_loop");
166172

167173
auto self = shared_from_this();
168174
std::vector<std::shared_ptr<Driver>> drivers;
169175

176+
milvus::tracer::AddEvent("before_create_drivers");
170177
drivers.reserve(num_ungrouped_drivers_);
171178
CreateDriversLocked(self, kUngroupedGroupId, drivers);
179+
milvus::tracer::AddEvent("after_create_drivers");
172180

173181
drivers_ = std::move(drivers);
174182
}
@@ -177,6 +185,7 @@ Task::Next(ContinueFuture* future) {
177185

178186
std::vector<ContinueFuture> futures;
179187
futures.resize(num_drivers);
188+
milvus::tracer::AddEvent("before_driver_execution_loop");
180189

181190
for (;;) {
182191
int runnable_drivers = 0;
@@ -232,6 +241,7 @@ Task::Next(ContinueFuture* future) {
232241
return nullptr;
233242
}
234243
}
244+
milvus::tracer::AddEvent("after_driver_execution_loop");
235245
}
236246

237247
} // namespace exec

internal/core/src/exec/operator/FilterBitsNode.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ PhyFilterBitsNode::IsFinished() {
5959

6060
RowVectorPtr
6161
PhyFilterBitsNode::GetOutput() {
62+
milvus::tracer::AddEvent("filter_bits_node_start");
63+
6264
if (AllInputProcessed()) {
6365
return nullptr;
6466
}
@@ -70,6 +72,8 @@ PhyFilterBitsNode::GetOutput() {
7072

7173
TargetBitmap bitset;
7274
TargetBitmap valid_bitset;
75+
76+
milvus::tracer::AddEvent("filter_bits_before_eval");
7377
while (num_processed_rows_ < need_process_rows_) {
7478
exprs_->Eval(0, 1, true, eval_ctx, results_);
7579

@@ -96,6 +100,8 @@ PhyFilterBitsNode::GetOutput() {
96100
"PhyFilterBitsNode result should be ColumnVector");
97101
}
98102
}
103+
milvus::tracer::AddEvent("filter_bits_after_eval");
104+
99105
bitset.flip();
100106
AssertInfo(bitset.size() == need_process_rows_,
101107
"bitset size: {}, need_process_rows_: {}",
@@ -109,6 +115,7 @@ PhyFilterBitsNode::GetOutput() {
109115
std::vector<VectorPtr> col_res;
110116
col_res.push_back(std::make_shared<ColumnVector>(std::move(bitset),
111117
std::move(valid_bitset)));
118+
milvus::tracer::AddEvent("filter_bits_node_end");
112119
std::chrono::high_resolution_clock::time_point scalar_end =
113120
std::chrono::high_resolution_clock::now();
114121
double scalar_cost =

internal/core/src/exec/operator/MvccNode.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// limitations under the License.
1616

1717
#include "MvccNode.h"
18+
#include "common/Tracer.h"
1819

1920
namespace milvus {
2021
namespace exec {
@@ -43,6 +44,8 @@ PhyMvccNode::AddInput(RowVectorPtr& input) {
4344

4445
RowVectorPtr
4546
PhyMvccNode::GetOutput() {
47+
milvus::tracer::AddEvent("mvcc_node_start");
48+
4649
if (is_finished_) {
4750
return nullptr;
4851
}
@@ -55,6 +58,8 @@ PhyMvccNode::GetOutput() {
5558
is_finished_ = true;
5659
return nullptr;
5760
}
61+
62+
milvus::tracer::AddEvent("mvcc_before_bitmap_creation");
5863
// the first vector is filtering result and second bitset is a valid bitset
5964
// if valid_bitset[i]==false, means result[i] is null
6065
auto col_input = is_source_node_ ? std::make_shared<ColumnVector>(
@@ -63,10 +68,16 @@ PhyMvccNode::GetOutput() {
6368
: GetColumnVector(input_);
6469

6570
TargetBitmapView data(col_input->GetRawData(), col_input->size());
71+
72+
milvus::tracer::AddEvent("mvcc_before_mask_timestamps");
6673
// need to expose null?
6774
segment_->mask_with_timestamps(
6875
data, query_timestamp_, collection_ttl_timestamp_);
76+
milvus::tracer::AddEvent("mvcc_after_mask_timestamps");
77+
6978
segment_->mask_with_delete(data, active_count_, query_timestamp_);
79+
milvus::tracer::AddEvent("mvcc_after_mask_delete");
80+
7081
is_finished_ = true;
7182

7283
// input_ have already been updated

internal/core/src/query/ExecPlanNodeVisitor.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,16 @@ ExecPlanNodeVisitor::ExecuteTask(
4444
plan.plan_node_->ToString(),
4545
query_context->get_active_count(),
4646
query_context->get_query_timestamp());
47+
48+
milvus::tracer::AddEvent("execute_task_before_create");
4749
auto task =
4850
milvus::exec::Task::Create(DEFAULT_TASK_ID, plan, 0, query_context);
51+
milvus::tracer::AddEvent("execute_task_after_create");
52+
4953
int64_t processed_num = 0;
5054
BitsetType bitset_holder;
55+
56+
milvus::tracer::AddEvent("execute_task_before_loop");
5157
for (;;) {
5258
auto result = task->Next();
5359
if (!result) {
@@ -66,6 +72,7 @@ ExecPlanNodeVisitor::ExecuteTask(
6672
ThrowInfo(UnexpectedError, "expr return type not matched");
6773
}
6874
}
75+
milvus::tracer::AddEvent("execute_task_after_loop");
6976
return bitset_holder;
7077
}
7178

@@ -85,9 +92,13 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
8592
return;
8693
}
8794

95+
milvus::tracer::AddEvent("visitor_before_plan_fragment");
96+
8897
// Construct plan fragment
8998
auto plan = plan::PlanFragment(node.plannodes_);
9099

100+
milvus::tracer::AddEvent("visitor_before_query_context");
101+
91102
// Set query context
92103
auto query_context =
93104
std::make_shared<milvus::exec::QueryContext>(DEAFULT_QUERY_ID,
@@ -105,8 +116,12 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
105116
auto op_context = milvus::OpContext();
106117
query_context->set_op_context(&op_context);
107118

119+
milvus::tracer::AddEvent("visitor_before_execute_task");
120+
108121
// Do plan fragment task work
109122
auto result = ExecuteTask(plan, query_context);
123+
124+
milvus::tracer::AddEvent("visitor_after_execute_task");
110125

111126
// Store result
112127
search_result_opt_ = std::move(query_context->get_search_result());

internal/core/src/segcore/SegmentInterface.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,26 @@ SegmentInternalInterface::Search(
9797
Timestamp timestamp,
9898
int32_t consistency_level,
9999
Timestamp collection_ttl) const {
100+
milvus::tracer::AddEvent("segment_search_start");
100101
std::shared_lock lck(mutex_);
101102
milvus::tracer::AddEvent("obtained_segment_lock_mutex");
103+
104+
milvus::tracer::AddEvent("before_check_search");
102105
check_search(plan);
106+
milvus::tracer::AddEvent("after_check_search");
107+
108+
milvus::tracer::AddEvent("before_create_visitor");
103109
query::ExecPlanNodeVisitor visitor(
104110
*this, timestamp, placeholder_group, consistency_level, collection_ttl);
111+
milvus::tracer::AddEvent("after_create_visitor");
112+
105113
auto results = std::make_unique<SearchResult>();
114+
milvus::tracer::AddEvent("before_visitor_get_result");
106115
*results = visitor.get_moved_result(*plan->plan_node_);
116+
milvus::tracer::AddEvent("after_visitor_get_result");
117+
107118
results->segment_ = (void*)this;
119+
milvus::tracer::AddEvent("segment_search_end");
108120
return results;
109121
}
110122

internal/core/src/segcore/segment_c.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,14 @@ AsyncSearch(CTraceContext c_trace,
122122
uint64_t timestamp,
123123
int32_t consistency_level,
124124
uint64_t collection_ttl) {
125+
milvus::tracer::AddEvent("async_search_cgo_entry");
126+
125127
auto segment = (milvus::segcore::SegmentInterface*)c_segment;
126128
auto plan = (milvus::query::Plan*)c_plan;
127129
auto phg_ptr = reinterpret_cast<const milvus::query::PlaceholderGroup*>(
128130
c_placeholder_group);
129131

132+
milvus::tracer::AddEvent("before_future_async");
130133
auto future = milvus::futures::Future<milvus::SearchResult>::async(
131134
milvus::futures::getGlobalCPUExecutor(),
132135
milvus::futures::ExecutePriority::HIGH,
@@ -137,27 +140,41 @@ AsyncSearch(CTraceContext c_trace,
137140
timestamp,
138141
consistency_level,
139142
collection_ttl](milvus::futures::CancellationToken cancel_token) {
143+
milvus::tracer::AddEvent("async_lambda_start");
144+
140145
// save trace context into search_info
141146
auto& trace_ctx = plan->plan_node_->search_info_.trace_ctx_;
142147
trace_ctx.traceID = c_trace.traceID;
143148
trace_ctx.spanID = c_trace.spanID;
144149
trace_ctx.traceFlags = c_trace.traceFlags;
145150

151+
milvus::tracer::AddEvent("before_span_start");
146152
auto span = milvus::tracer::StartSpan("SegCoreSearch", &trace_ctx);
147153
milvus::tracer::SetRootSpan(span);
154+
milvus::tracer::AddEvent("after_span_start");
148155

156+
milvus::tracer::AddEvent("before_lazy_check_schema");
149157
segment->LazyCheckSchema(plan->schema_);
158+
milvus::tracer::AddEvent("after_lazy_check_schema");
150159

160+
milvus::tracer::AddEvent("before_segment_search");
151161
auto search_result = segment->Search(
152162
plan, phg_ptr, timestamp, consistency_level, collection_ttl);
163+
milvus::tracer::AddEvent("after_segment_search");
164+
153165
if (!milvus::PositivelyRelated(
154166
plan->plan_node_->search_info_.metric_type_)) {
167+
milvus::tracer::AddEvent("before_distance_negation");
155168
for (auto& dis : search_result->distances_) {
156169
dis *= -1;
157170
}
171+
milvus::tracer::AddEvent("after_distance_negation");
158172
}
173+
174+
milvus::tracer::AddEvent("before_span_end");
159175
span->End();
160176
milvus::tracer::CloseRootSpan();
177+
milvus::tracer::AddEvent("async_lambda_end");
161178
return search_result.release();
162179
});
163180
return static_cast<CFuture*>(static_cast<void*>(

0 commit comments

Comments
 (0)