Skip to content

Commit ddc72f3

Browse files
committed
cli/debug: narrow scope of --include-goroutine-stacks to just debug=2
Release note: none. Epic: none.
1 parent 7be14aa commit ddc72f3

File tree

4 files changed

+38
-31
lines changed

4 files changed

+38
-31
lines changed

pkg/cli/cliflags/flags.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1825,12 +1825,13 @@ Labs support.
18251825
ZipIncludeGoroutineStacks = FlagInfo{
18261826
Name: "include-goroutine-stacks",
18271827
Description: `
1828-
Fetch stack traces for all goroutines running on each targeted node in nodes/*/stacks.txt
1829-
and nodes/*/stacks_with_labels.txt files. Note that fetching stack traces for all goroutines is
1830-
a "stop-the-world" operation, which can momentarily have negative impacts on SQL service
1831-
latency. Note that any periodic goroutine dumps previously taken on the node will still be
1832-
included in nodes/*/goroutines/*.txt.gz, as these would have already been generated and don't
1833-
require any additional stop-the-world operations to be collected.
1828+
Fetch full stack traces for all goroutines running on each targeted node in nodes/*/stacks.txt files.
1829+
Note that fetching text stack traces for all goroutines incurs a brief "stop-the-world" pause of each
1830+
node which can momentarily have negative impacts on SQL service latency. This flag only controls
1831+
collection of new full dump of all current goroutine stacks -- any previously recorded, periodic
1832+
goroutine dumps retained in the logs directories are still included (in nodes/*/goroutines/*.txt.gz)
1833+
and collection of aggregate counts of current goroutine stacks -- which does not incur a stop-the-world
1834+
pause -- remains enabled regardless of this flag's value.
18341835
`,
18351836
}
18361837

pkg/cli/context.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,9 @@ type zipContext struct {
359359
includeRangeInfo bool
360360

361361
// includeStacks fetches all goroutines running on each targeted node in
362-
// nodes/*/stacks.txt and nodes/*/stacks_with_labels.txt files. Note that
363-
// fetching stack traces for all goroutines is a temporary "stop the world"
364-
// operation, which can momentarily have negative impacts on SQL service
365-
// latency.
362+
// nodes/*/stacks.txt. Note that fetching debug=2 stack traces for all
363+
// goroutines incurs a temporary "stop the world" operation, which can
364+
// momentarily have negative impacts on SQL service latency.
366365
includeStacks bool
367366

368367
// includeRunningJobTraces includes the active traces of each running

pkg/cli/testdata/zip/testzip_exclude_goroutine_stacks

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ debug zip --concurrency=1 --cpu-profile-duration=1s --validate-zip-file=false --
2525
[node 1] requesting data for debug/nodes/1/details... received response... writing JSON output: debug/nodes/1/details.json... done
2626
[node 1] requesting data for debug/nodes/1/gossip... received response... writing JSON output: debug/nodes/1/gossip.json... done
2727
[node 1] Skipping fetching goroutine stacks. Enable via the --include-goroutine-stacks flag.
28+
[node 1] requesting stacks with labels... received response... writing binary output: debug/nodes/1/stacks_with_labels.txt... done
2829
[node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done
2930
[node 1] requesting engine stats... received response... writing binary output: debug/nodes/1/lsm.txt... done
3031
[node 1] requesting heap profile list... received response... done

pkg/cli/zip_per_node.go

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,12 @@ func (zc *debugZipContext) getCurrentHeapProfile(
579579
func (zc *debugZipContext) getStackInformation(
580580
ctx context.Context, nodePrinter *zipReporter, id string, prefix string,
581581
) error {
582+
// zipCtx.includeStacks controls specifically the inclusion of stacks.txt, as
583+
// the debug=2 collection scheme has a "stop the world" implementation that
584+
// makes it potentially disruptive and thus has an option to skip it. This
585+
// option does _not_ need to skip collection of stack groups (debug=1) or the
586+
// binary goroutine profile (debug=3 or debug=0), as these do not have that
587+
// same stop-the-world disruption.
582588
if zipCtx.includeStacks {
583589
if zipCtx.files.shouldIncludeFile(stacksFileName) {
584590
var stacksData []byte
@@ -600,32 +606,32 @@ func (zc *debugZipContext) getStackInformation(
600606
} else {
601607
nodePrinter.info("skipping %s due to file filters", stacksFileName)
602608
}
609+
} else {
610+
nodePrinter.info("Skipping fetching goroutine stacks. Enable via the --%s flag.", cliflags.ZipIncludeGoroutineStacks.Name)
611+
}
603612

604-
var stacksDataWithLabels []byte
605-
if zipCtx.files.shouldIncludeFile(stacksWithLabelFileName) {
606-
s := nodePrinter.start("requesting stacks with labels")
607-
requestErr := zc.runZipFn(ctx, s,
608-
func(ctx context.Context) error {
609-
stacks, err := zc.status.Stacks(ctx, &serverpb.StacksRequest{
610-
NodeId: id,
611-
Type: serverpb.StacksType_GOROUTINE_STACKS_DEBUG_1,
612-
})
613-
if err == nil {
614-
stacksDataWithLabels = stacks.Data
615-
}
616-
return err
613+
var stacksDataWithLabels []byte
614+
if zipCtx.files.shouldIncludeFile(stacksWithLabelFileName) {
615+
s := nodePrinter.start("requesting stacks with labels")
616+
requestErr := zc.runZipFn(ctx, s,
617+
func(ctx context.Context) error {
618+
stacks, err := zc.status.Stacks(ctx, &serverpb.StacksRequest{
619+
NodeId: id,
620+
Type: serverpb.StacksType_GOROUTINE_STACKS_DEBUG_1,
617621
})
618-
if zipCtx.redact {
619-
stacksDataWithLabels = redactStackTrace(stacksDataWithLabels)
620-
}
621-
if err := zc.z.createRawOrError(s, prefix+"/"+stacksWithLabelFileName, stacksDataWithLabels, requestErr); err != nil {
622+
if err == nil {
623+
stacksDataWithLabels = stacks.Data
624+
}
622625
return err
623-
}
624-
} else {
625-
nodePrinter.info("skipping %s due to file filters", stacksWithLabelFileName)
626+
})
627+
if zipCtx.redact {
628+
stacksDataWithLabels = redactStackTrace(stacksDataWithLabels)
629+
}
630+
if err := zc.z.createRawOrError(s, prefix+"/"+stacksWithLabelFileName, stacksDataWithLabels, requestErr); err != nil {
631+
return err
626632
}
627633
} else {
628-
nodePrinter.info("Skipping fetching goroutine stacks. Enable via the --%s flag.", cliflags.ZipIncludeGoroutineStacks.Name)
634+
nodePrinter.info("skipping %s due to file filters", stacksWithLabelFileName)
629635
}
630636
return nil
631637
}

0 commit comments

Comments
 (0)