268 lines
11 KiB
C
268 lines
11 KiB
C
|
//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
|
||
|
#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
|
||
|
#include "ErrorHandling.h"
|
||
|
#include "PerfReader.h"
|
||
|
#include "ProfiledBinary.h"
|
||
|
#include "llvm/ProfileData/SampleProfWriter.h"
|
||
|
|
||
|
using namespace llvm;
|
||
|
using namespace sampleprof;
|
||
|
|
||
|
namespace llvm {
|
||
|
namespace sampleprof {
|
||
|
|
||
|
class ProfileGenerator {
|
||
|
|
||
|
public:
|
||
|
ProfileGenerator(){};
|
||
|
virtual ~ProfileGenerator() = default;
|
||
|
static std::unique_ptr<ProfileGenerator>
|
||
|
create(const BinarySampleCounterMap &BinarySampleCounters,
|
||
|
enum PerfScriptType SampleType);
|
||
|
virtual void generateProfile() = 0;
|
||
|
// Use SampleProfileWriter to serialize profile map
|
||
|
virtual void write(std::unique_ptr<SampleProfileWriter> Writer,
|
||
|
StringMap<FunctionSamples> &ProfileMap);
|
||
|
void write();
|
||
|
|
||
|
protected:
|
||
|
/*
|
||
|
For each region boundary point, mark if it is begin or end (or both) of
|
||
|
the region. Boundary points are inclusive. Log the sample count as well
|
||
|
so we can use it when we compute the sample count of each disjoint region
|
||
|
later. Note that there might be multiple ranges with different sample
|
||
|
count that share same begin/end point. We need to accumulate the sample
|
||
|
count for the boundary point for such case, because for the example
|
||
|
below,
|
||
|
|
||
|
|<--100-->|
|
||
|
|<------200------>|
|
||
|
A B C
|
||
|
|
||
|
sample count for disjoint region [A,B] would be 300.
|
||
|
*/
|
||
|
void findDisjointRanges(RangeSample &DisjointRanges,
|
||
|
const RangeSample &Ranges);
|
||
|
|
||
|
// Used by SampleProfileWriter
|
||
|
StringMap<FunctionSamples> ProfileMap;
|
||
|
};
|
||
|
|
||
|
class CSProfileGenerator : public ProfileGenerator {
|
||
|
protected:
|
||
|
const BinarySampleCounterMap &BinarySampleCounters;
|
||
|
|
||
|
public:
|
||
|
CSProfileGenerator(const BinarySampleCounterMap &Counters)
|
||
|
: BinarySampleCounters(Counters){};
|
||
|
|
||
|
public:
|
||
|
void generateProfile() override {
|
||
|
for (const auto &BI : BinarySampleCounters) {
|
||
|
ProfiledBinary *Binary = BI.first;
|
||
|
for (const auto &CI : BI.second) {
|
||
|
const StringBasedCtxKey *CtxKey =
|
||
|
dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
|
||
|
StringRef ContextId(CtxKey->Context);
|
||
|
// Get or create function profile for the range
|
||
|
FunctionSamples &FunctionProfile =
|
||
|
getFunctionProfileForContext(ContextId);
|
||
|
|
||
|
// Fill in function body samples
|
||
|
populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
|
||
|
Binary);
|
||
|
// Fill in boundary sample counts as well as call site samples for calls
|
||
|
populateFunctionBoundarySamples(ContextId, FunctionProfile,
|
||
|
CI.second.BranchCounter, Binary);
|
||
|
}
|
||
|
}
|
||
|
// Fill in call site value sample for inlined calls and also use context to
|
||
|
// infer missing samples. Since we don't have call count for inlined
|
||
|
// functions, we estimate it from inlinee's profile using the entry of the
|
||
|
// body sample.
|
||
|
populateInferredFunctionSamples();
|
||
|
}
|
||
|
|
||
|
// Remove adjacent repeated context sequences up to a given sequence length,
|
||
|
// -1 means no size limit. Note that repeated sequences are identified based
|
||
|
// on the exact call site, this is finer granularity than function recursion.
|
||
|
template <typename T>
|
||
|
static void compressRecursionContext(SmallVectorImpl<T> &Context,
|
||
|
int32_t CSize = MaxCompressionSize) {
|
||
|
uint32_t I = 1;
|
||
|
uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
|
||
|
uint32_t MaxDedupSize =
|
||
|
CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
|
||
|
auto BeginIter = Context.begin();
|
||
|
// Use an in-place algorithm to save memory copy
|
||
|
// End indicates the end location of current iteration's data
|
||
|
uint32_t End = 0;
|
||
|
// Deduplicate from length 1 to the max possible size of a repeated
|
||
|
// sequence.
|
||
|
while (I <= MaxDedupSize) {
|
||
|
// This is a linear algorithm that deduplicates adjacent repeated
|
||
|
// sequences of size I. The deduplication detection runs on a sliding
|
||
|
// window whose size is 2*I and it keeps sliding the window to deduplicate
|
||
|
// the data inside. Once duplication is detected, deduplicate it by
|
||
|
// skipping the right half part of the window, otherwise just copy back
|
||
|
// the new one by appending them at the back of End pointer(for the next
|
||
|
// iteration).
|
||
|
//
|
||
|
// For example:
|
||
|
// Input: [a1, a2, b1, b2]
|
||
|
// (Added index to distinguish the same char, the origin is [a, a, b,
|
||
|
// b], the size of the dedup window is 2(I = 1) at the beginning)
|
||
|
//
|
||
|
// 1) The initial status is a dummy window[null, a1], then just copy the
|
||
|
// right half of the window(End = 0), then slide the window.
|
||
|
// Result: [a1], a2, b1, b2 (End points to the element right before ],
|
||
|
// after ] is the data of the previous iteration)
|
||
|
//
|
||
|
// 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
|
||
|
// the window i.e the duplication happen. Only slide the window.
|
||
|
// Result: [a1], a2, b1, b2
|
||
|
//
|
||
|
// 3) Next window is [a2, b1], copy the right half of the window(b1 is
|
||
|
// new) to the End and slide the window.
|
||
|
// Result: [a1, b1], b1, b2
|
||
|
//
|
||
|
// 4) Next window is [b1, b2], same to 2), skip b2.
|
||
|
// Result: [a1, b1], b1, b2
|
||
|
// After resize, it will be [a, b]
|
||
|
|
||
|
// Use pointers like below to do comparison inside the window
|
||
|
// [a b c a b c]
|
||
|
// | | | | |
|
||
|
// LeftBoundary Left Right Left+I Right+I
|
||
|
// A duplication found if Left < LeftBoundry.
|
||
|
|
||
|
int32_t Right = I - 1;
|
||
|
End = I;
|
||
|
int32_t LeftBoundary = 0;
|
||
|
while (Right + I < Context.size()) {
|
||
|
// To avoids scanning a part of a sequence repeatedly, it finds out
|
||
|
// the common suffix of two hald in the window. The common suffix will
|
||
|
// serve as the common prefix of next possible pair of duplicate
|
||
|
// sequences. The non-common part will be ignored and never scanned
|
||
|
// again.
|
||
|
|
||
|
// For example.
|
||
|
// Input: [a, b1], c1, b2, c2
|
||
|
// I = 2
|
||
|
//
|
||
|
// 1) For the window [a, b1, c1, b2], non-common-suffix for the right
|
||
|
// part is 'c1', copy it and only slide the window 1 step.
|
||
|
// Result: [a, b1, c1], b2, c2
|
||
|
//
|
||
|
// 2) Next window is [b1, c1, b2, c2], so duplication happen.
|
||
|
// Result after resize: [a, b, c]
|
||
|
|
||
|
int32_t Left = Right;
|
||
|
while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
|
||
|
// Find the longest suffix inside the window. When stops, Left points
|
||
|
// at the diverging point in the current sequence.
|
||
|
Left--;
|
||
|
}
|
||
|
|
||
|
bool DuplicationFound = (Left < LeftBoundary);
|
||
|
// Don't need to recheck the data before Right
|
||
|
LeftBoundary = Right + 1;
|
||
|
if (DuplicationFound) {
|
||
|
// Duplication found, skip right half of the window.
|
||
|
Right += I;
|
||
|
} else {
|
||
|
// Copy the non-common-suffix part of the adjacent sequence.
|
||
|
std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
|
||
|
BeginIter + End);
|
||
|
End += Left + I - Right;
|
||
|
// Only slide the window by the size of non-common-suffix
|
||
|
Right = Left + I;
|
||
|
}
|
||
|
}
|
||
|
// Don't forget the remaining part that's not scanned.
|
||
|
std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
|
||
|
End += Context.size() - Right - 1;
|
||
|
I++;
|
||
|
Context.resize(End);
|
||
|
MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
protected:
|
||
|
// Lookup or create FunctionSamples for the context
|
||
|
FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
|
||
|
// Merge cold context profile whose total sample is below threshold
|
||
|
// into base profile.
|
||
|
void mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap);
|
||
|
void write(std::unique_ptr<SampleProfileWriter> Writer,
|
||
|
StringMap<FunctionSamples> &ProfileMap) override;
|
||
|
|
||
|
private:
|
||
|
// Helper function for updating body sample for a leaf location in
|
||
|
// FunctionProfile
|
||
|
void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
|
||
|
const FrameLocation &LeafLoc,
|
||
|
uint64_t Count);
|
||
|
void populateFunctionBodySamples(FunctionSamples &FunctionProfile,
|
||
|
const RangeSample &RangeCounters,
|
||
|
ProfiledBinary *Binary);
|
||
|
void populateFunctionBoundarySamples(StringRef ContextId,
|
||
|
FunctionSamples &FunctionProfile,
|
||
|
const BranchSample &BranchCounters,
|
||
|
ProfiledBinary *Binary);
|
||
|
void populateInferredFunctionSamples();
|
||
|
|
||
|
public:
|
||
|
// Deduplicate adjacent repeated context sequences up to a given sequence
|
||
|
// length. -1 means no size limit.
|
||
|
static int32_t MaxCompressionSize;
|
||
|
};
|
||
|
|
||
|
using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>;
|
||
|
|
||
|
class PseudoProbeCSProfileGenerator : public CSProfileGenerator {
|
||
|
|
||
|
public:
|
||
|
PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters)
|
||
|
: CSProfileGenerator(Counters) {}
|
||
|
void generateProfile() override;
|
||
|
|
||
|
private:
|
||
|
// Go through each address from range to extract the top frame probe by
|
||
|
// looking up in the Address2ProbeMap
|
||
|
void extractProbesFromRange(const RangeSample &RangeCounter,
|
||
|
ProbeCounterMap &ProbeCounter,
|
||
|
ProfiledBinary *Binary);
|
||
|
// Fill in function body samples from probes
|
||
|
void
|
||
|
populateBodySamplesWithProbes(const RangeSample &RangeCounter,
|
||
|
SmallVectorImpl<std::string> &ContextStrStack,
|
||
|
ProfiledBinary *Binary);
|
||
|
// Fill in boundary samples for a call probe
|
||
|
void populateBoundarySamplesWithProbes(
|
||
|
const BranchSample &BranchCounter,
|
||
|
SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary);
|
||
|
// Helper function to get FunctionSamples for the leaf inlined context
|
||
|
FunctionSamples &
|
||
|
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
|
||
|
const PseudoProbeFuncDesc *LeafFuncDesc);
|
||
|
// Helper function to get FunctionSamples for the leaf probe
|
||
|
FunctionSamples &
|
||
|
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
|
||
|
const PseudoProbe *LeafProbe,
|
||
|
ProfiledBinary *Binary);
|
||
|
};
|
||
|
|
||
|
} // end namespace sampleprof
|
||
|
} // end namespace llvm
|
||
|
|
||
|
#endif
|