Skip to content

Commit 75cd29b

Browse files
[MemProf] Add option to emit full call context for matched allocations (#170516)
Add the -memprof-print-matched-alloc-stack option to enable emitting the full allocation call context (of stack ids) for each matched allocation reported by -memprof-print-match-info. Noop when the latter is not enabled.
1 parent ca6eb2f commit 75cd29b

File tree

2 files changed

+83
-39
lines changed

2 files changed

+83
-39
lines changed

llvm/lib/Transforms/Instrumentation/MemProfUse.cpp

Lines changed: 64 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ static cl::opt<bool>
6262
"context in this module's profiles"),
6363
cl::Hidden, cl::init(false));
6464

65+
static cl::opt<bool> PrintMatchedAllocStack(
66+
"memprof-print-matched-alloc-stack",
67+
cl::desc("Print full stack context for matched "
68+
"allocations with -memprof-print-match-info."),
69+
cl::Hidden, cl::init(false));
70+
6571
static cl::opt<bool>
6672
PrintFunctionGuids("memprof-print-function-guids",
6773
cl::desc("Print function GUIDs computed for matching"),
@@ -227,9 +233,26 @@ static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
227233
<< Reason << ".\n");
228234
}
229235

236+
// Structure for tracking info about matched allocation contexts for use with
237+
// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
230238
struct AllocMatchInfo {
239+
// Total size in bytes of matched context.
231240
uint64_t TotalSize = 0;
241+
// Matched allocation's type.
232242
AllocationType AllocType = AllocationType::None;
243+
// Number of frames matched to the allocation itself (values will be >1 in
244+
// cases where allocation was already inlined). Use a set because there can
245+
// be multiple inlined instances and each may have a different inline depth.
246+
// Use std::set to iterate in sorted order when printing.
247+
std::set<unsigned> MatchedFramesSet;
248+
// The full call stack of the allocation, for cases where requested via
249+
// -memprof-print-matched-alloc-stack.
250+
std::vector<Frame> CallStack;
251+
252+
// Caller responsible for inserting the matched frames and the call stack when
253+
// appropriate.
254+
AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
255+
: TotalSize(TotalSize), AllocType(AllocType) {}
233256
};
234257

235258
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
@@ -407,13 +430,11 @@ static void addVPMetadata(Module &M, Instruction &I,
407430
}
408431
}
409432

410-
static void
411-
handleAllocSite(Instruction &I, CallBase *CI,
412-
ArrayRef<uint64_t> InlinedCallStack, LLVMContext &Ctx,
413-
OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
414-
const std::set<const AllocationInfo *> &AllocInfoSet,
415-
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
416-
&FullStackIdToAllocMatchInfo) {
433+
static void handleAllocSite(
434+
Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
435+
LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
436+
const std::set<const AllocationInfo *> &AllocInfoSet,
437+
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
417438
// TODO: Remove this once the profile creation logic deduplicates contexts
418439
// that are the same other than the IsInlineFrame bool. Until then, keep the
419440
// largest.
@@ -455,9 +476,15 @@ handleAllocSite(Instruction &I, CallBase *CI,
455476
// was requested.
456477
if (ClPrintMemProfMatchInfo) {
457478
assert(FullStackId != 0);
458-
FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
459-
InlinedCallStack.size())] = {
460-
AllocInfo->Info.getTotalSize(), AllocType};
479+
auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
480+
FullStackId,
481+
AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
482+
// Always insert the new matched frame count, since it may differ.
483+
Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
484+
if (Inserted && PrintMatchedAllocStack)
485+
Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
486+
AllocInfo->CallStack.begin(),
487+
AllocInfo->CallStack.end());
461488
}
462489
ORE.emit(
463490
OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
@@ -564,14 +591,13 @@ static void handleCallSite(Instruction &I, const Function *CalledFunction,
564591
addVPMetadata(M, I, CalleeGuids.getArrayRef());
565592
}
566593

567-
static void readMemprof(Module &M, Function &F,
568-
IndexedInstrProfReader *MemProfReader,
569-
const TargetLibraryInfo &TLI,
570-
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
571-
&FullStackIdToAllocMatchInfo,
572-
std::set<std::vector<uint64_t>> &MatchedCallSites,
573-
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
574-
OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
594+
static void
595+
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
596+
const TargetLibraryInfo &TLI,
597+
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
598+
std::set<std::vector<uint64_t>> &MatchedCallSites,
599+
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
600+
OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
575601
auto &Ctx = M.getContext();
576602
// Previously we used getIRPGOFuncName() here. If F is local linkage,
577603
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -799,11 +825,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
799825
if (SalvageStaleProfile)
800826
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
801827

802-
// Map from the stack hash and matched frame count of each allocation context
803-
// in the function profiles to the total profiled size (bytes) and allocation
804-
// type.
805-
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
806-
FullStackIdToAllocMatchInfo;
828+
// Map from the stack hash of each matched allocation context in the function
829+
// profiles to match info such as the total profiled size (bytes), allocation
830+
// type, number of frames matched to the allocation itself, and the full array
831+
// of call stack ids.
832+
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
807833

808834
// Set of the matched call sites, each expressed as a sequence of an inline
809835
// call stack.
@@ -824,11 +850,21 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
824850
}
825851

826852
if (ClPrintMemProfMatchInfo) {
827-
for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
828-
auto [Id, Length] = IdLengthPair;
829-
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
830-
<< " context with id " << Id << " has total profiled size "
831-
<< Info.TotalSize << " is matched with " << Length << " frames\n";
853+
for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
854+
for (auto Frames : Info.MatchedFramesSet) {
855+
// TODO: To reduce verbosity, should we change the existing message
856+
// so that we emit a list of matched frame counts in a single message
857+
// about the context (instead of one message per frame count?
858+
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
859+
<< " context with id " << Id << " has total profiled size "
860+
<< Info.TotalSize << " is matched with " << Frames << " frames";
861+
if (PrintMatchedAllocStack) {
862+
errs() << " and call stack";
863+
for (auto &F : Info.CallStack)
864+
errs() << " " << computeStackId(F);
865+
}
866+
errs() << "\n";
867+
}
832868
}
833869

834870
for (const auto &CallStack : MatchedCallSites) {

llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,13 @@
2626
; REQUIRES: x86_64-linux
2727
; RUN: split-file %s %t
2828
; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
29-
; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-function-guids -S -pass-remarks=memprof 2>&1 | FileCheck %s
29+
; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-function-guids -S -pass-remarks=memprof 2>&1 | FileCheck %s --check-prefixes=MATCH,FUNCGUID,REMARK
30+
;; Test that -memprof-print-matched-alloc-stack enables reporting of the full
31+
;; matched stack.
32+
; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --check-prefixes=MATCH,STACK
33+
;; Test that -memprof-print-matched-alloc-stack without -memprof-print-match-info
34+
;; is a noop.
35+
; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --implicit-check-not="context with id" --implicit-check-not="and call stack"
3036

3137
;--- memprof-dump-matched-alloc-site.yaml
3238
---
@@ -79,17 +85,19 @@ HeapProfileRecords:
7985
;--- memprof-dump-matched-alloc-site.ll
8086

8187
;; From -pass-remarks=memprof and -memprof-print-function-guids
82-
; CHECK: MemProf: Function GUID 4708092051066754107 is _Z2f1v
83-
; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1
84-
; CHECK: MemProf: Function GUID 14255129117669598641 is _Z2f2v
85-
; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2
86-
; CHECK: MemProf: Function GUID 2771528421763978342 is _Z2f3v
87-
; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3
88+
; FUNCGUID: MemProf: Function GUID 4708092051066754107 is _Z2f1v
89+
; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1
90+
; FUNCGUID: MemProf: Function GUID 14255129117669598641 is _Z2f2v
91+
; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2
92+
; FUNCGUID: MemProf: Function GUID 2771528421763978342 is _Z2f3v
93+
; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3
8894

89-
;; From -memprof-print-match-info
90-
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
91-
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
92-
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
95+
; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
96+
; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
97+
; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
98+
; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
99+
; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
100+
; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
93101

94102
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
95103
target triple = "x86_64-unknown-linux-gnu"

0 commit comments

Comments
 (0)