363 lines
11 KiB
C++
363 lines
11 KiB
C++
|
//===---------------------------- GCNILPSched.cpp - -----------------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
/// \file
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "llvm/CodeGen/ScheduleDAG.h"
|
||
|
|
||
|
using namespace llvm;
|
||
|
|
||
|
#define DEBUG_TYPE "machine-scheduler"
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
class GCNILPScheduler {
|
||
|
struct Candidate : ilist_node<Candidate> {
|
||
|
SUnit *SU;
|
||
|
|
||
|
Candidate(SUnit *SU_)
|
||
|
: SU(SU_) {}
|
||
|
};
|
||
|
|
||
|
SpecificBumpPtrAllocator<Candidate> Alloc;
|
||
|
typedef simple_ilist<Candidate> Queue;
|
||
|
Queue PendingQueue;
|
||
|
Queue AvailQueue;
|
||
|
unsigned CurQueueId = 0;
|
||
|
|
||
|
std::vector<unsigned> SUNumbers;
|
||
|
|
||
|
/// CurCycle - The current scheduler state corresponds to this cycle.
|
||
|
unsigned CurCycle = 0;
|
||
|
|
||
|
unsigned getNodePriority(const SUnit *SU) const;
|
||
|
|
||
|
const SUnit *pickBest(const SUnit *left, const SUnit *right);
|
||
|
Candidate* pickCandidate();
|
||
|
|
||
|
void releasePending();
|
||
|
void advanceToCycle(unsigned NextCycle);
|
||
|
void releasePredecessors(const SUnit* SU);
|
||
|
|
||
|
public:
|
||
|
std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
|
||
|
const ScheduleDAG &DAG);
|
||
|
};
|
||
|
} // namespace
|
||
|
|
||
|
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
|
||
|
/// Smaller number is the higher priority.
|
||
|
static unsigned
|
||
|
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
|
||
|
unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
|
||
|
if (SethiUllmanNumber != 0)
|
||
|
return SethiUllmanNumber;
|
||
|
|
||
|
unsigned Extra = 0;
|
||
|
for (const SDep &Pred : SU->Preds) {
|
||
|
if (Pred.isCtrl()) continue; // ignore chain preds
|
||
|
SUnit *PredSU = Pred.getSUnit();
|
||
|
unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
|
||
|
if (PredSethiUllman > SethiUllmanNumber) {
|
||
|
SethiUllmanNumber = PredSethiUllman;
|
||
|
Extra = 0;
|
||
|
}
|
||
|
else if (PredSethiUllman == SethiUllmanNumber)
|
||
|
++Extra;
|
||
|
}
|
||
|
|
||
|
SethiUllmanNumber += Extra;
|
||
|
|
||
|
if (SethiUllmanNumber == 0)
|
||
|
SethiUllmanNumber = 1;
|
||
|
|
||
|
return SethiUllmanNumber;
|
||
|
}
|
||
|
|
||
|
// Lower priority means schedule further down. For bottom-up scheduling, lower
|
||
|
// priority SUs are scheduled before higher priority SUs.
|
||
|
unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
|
||
|
assert(SU->NodeNum < SUNumbers.size());
|
||
|
if (SU->NumSuccs == 0 && SU->NumPreds != 0)
|
||
|
// If SU does not have a register use, i.e. it doesn't produce a value
|
||
|
// that would be consumed (e.g. store), then it terminates a chain of
|
||
|
// computation. Give it a large SethiUllman number so it will be
|
||
|
// scheduled right before its predecessors that it doesn't lengthen
|
||
|
// their live ranges.
|
||
|
return 0xffff;
|
||
|
|
||
|
if (SU->NumPreds == 0 && SU->NumSuccs != 0)
|
||
|
// If SU does not have a register def, schedule it close to its uses
|
||
|
// because it does not lengthen any live ranges.
|
||
|
return 0;
|
||
|
|
||
|
return SUNumbers[SU->NodeNum];
|
||
|
}
|
||
|
|
||
|
/// closestSucc - Returns the scheduled cycle of the successor which is
|
||
|
/// closest to the current cycle.
|
||
|
static unsigned closestSucc(const SUnit *SU) {
|
||
|
unsigned MaxHeight = 0;
|
||
|
for (const SDep &Succ : SU->Succs) {
|
||
|
if (Succ.isCtrl()) continue; // ignore chain succs
|
||
|
unsigned Height = Succ.getSUnit()->getHeight();
|
||
|
// If there are bunch of CopyToRegs stacked up, they should be considered
|
||
|
// to be at the same position.
|
||
|
if (Height > MaxHeight)
|
||
|
MaxHeight = Height;
|
||
|
}
|
||
|
return MaxHeight;
|
||
|
}
|
||
|
|
||
|
/// calcMaxScratches - Returns an cost estimate of the worse case requirement
|
||
|
/// for scratch registers, i.e. number of data dependencies.
|
||
|
static unsigned calcMaxScratches(const SUnit *SU) {
|
||
|
unsigned Scratches = 0;
|
||
|
for (const SDep &Pred : SU->Preds) {
|
||
|
if (Pred.isCtrl()) continue; // ignore chain preds
|
||
|
Scratches++;
|
||
|
}
|
||
|
return Scratches;
|
||
|
}
|
||
|
|
||
|
// Return -1 if left has higher priority, 1 if right has higher priority.
|
||
|
// Return 0 if latency-based priority is equivalent.
|
||
|
static int BUCompareLatency(const SUnit *left, const SUnit *right) {
|
||
|
// Scheduling an instruction that uses a VReg whose postincrement has not yet
|
||
|
// been scheduled will induce a copy. Model this as an extra cycle of latency.
|
||
|
int LHeight = (int)left->getHeight();
|
||
|
int RHeight = (int)right->getHeight();
|
||
|
|
||
|
// If either node is scheduling for latency, sort them by height/depth
|
||
|
// and latency.
|
||
|
|
||
|
// If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
|
||
|
// is enabled, grouping instructions by cycle, then its height is already
|
||
|
// covered so only its depth matters. We also reach this point if both stall
|
||
|
// but have the same height.
|
||
|
if (LHeight != RHeight)
|
||
|
return LHeight > RHeight ? 1 : -1;
|
||
|
|
||
|
int LDepth = left->getDepth();
|
||
|
int RDepth = right->getDepth();
|
||
|
if (LDepth != RDepth) {
|
||
|
LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
|
||
|
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
|
||
|
<< ") depth " << RDepth << "\n");
|
||
|
return LDepth < RDepth ? 1 : -1;
|
||
|
}
|
||
|
if (left->Latency != right->Latency)
|
||
|
return left->Latency > right->Latency ? 1 : -1;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
|
||
|
{
|
||
|
// TODO: add register pressure lowering checks
|
||
|
|
||
|
bool const DisableSchedCriticalPath = false;
|
||
|
int MaxReorderWindow = 6;
|
||
|
if (!DisableSchedCriticalPath) {
|
||
|
int spread = (int)left->getDepth() - (int)right->getDepth();
|
||
|
if (std::abs(spread) > MaxReorderWindow) {
|
||
|
LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
|
||
|
<< left->getDepth() << " != SU(" << right->NodeNum
|
||
|
<< "): " << right->getDepth() << "\n");
|
||
|
return left->getDepth() < right->getDepth() ? right : left;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool const DisableSchedHeight = false;
|
||
|
if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
|
||
|
int spread = (int)left->getHeight() - (int)right->getHeight();
|
||
|
if (std::abs(spread) > MaxReorderWindow)
|
||
|
return left->getHeight() > right->getHeight() ? right : left;
|
||
|
}
|
||
|
|
||
|
// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
|
||
|
unsigned LPriority = getNodePriority(left);
|
||
|
unsigned RPriority = getNodePriority(right);
|
||
|
|
||
|
if (LPriority != RPriority)
|
||
|
return LPriority > RPriority ? right : left;
|
||
|
|
||
|
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
|
||
|
// e.g.
|
||
|
// t1 = op t2, c1
|
||
|
// t3 = op t4, c2
|
||
|
//
|
||
|
// and the following instructions are both ready.
|
||
|
// t2 = op c3
|
||
|
// t4 = op c4
|
||
|
//
|
||
|
// Then schedule t2 = op first.
|
||
|
// i.e.
|
||
|
// t4 = op c4
|
||
|
// t2 = op c3
|
||
|
// t1 = op t2, c1
|
||
|
// t3 = op t4, c2
|
||
|
//
|
||
|
// This creates more short live intervals.
|
||
|
unsigned LDist = closestSucc(left);
|
||
|
unsigned RDist = closestSucc(right);
|
||
|
if (LDist != RDist)
|
||
|
return LDist < RDist ? right : left;
|
||
|
|
||
|
// How many registers becomes live when the node is scheduled.
|
||
|
unsigned LScratch = calcMaxScratches(left);
|
||
|
unsigned RScratch = calcMaxScratches(right);
|
||
|
if (LScratch != RScratch)
|
||
|
return LScratch > RScratch ? right : left;
|
||
|
|
||
|
bool const DisableSchedCycles = false;
|
||
|
if (!DisableSchedCycles) {
|
||
|
int result = BUCompareLatency(left, right);
|
||
|
if (result != 0)
|
||
|
return result > 0 ? right : left;
|
||
|
return left;
|
||
|
}
|
||
|
else {
|
||
|
if (left->getHeight() != right->getHeight())
|
||
|
return (left->getHeight() > right->getHeight()) ? right : left;
|
||
|
|
||
|
if (left->getDepth() != right->getDepth())
|
||
|
return (left->getDepth() < right->getDepth()) ? right : left;
|
||
|
}
|
||
|
|
||
|
assert(left->NodeQueueId && right->NodeQueueId &&
|
||
|
"NodeQueueId cannot be zero");
|
||
|
return (left->NodeQueueId > right->NodeQueueId) ? right : left;
|
||
|
}
|
||
|
|
||
|
GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
|
||
|
if (AvailQueue.empty())
|
||
|
return nullptr;
|
||
|
auto Best = AvailQueue.begin();
|
||
|
for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
|
||
|
auto NewBestSU = pickBest(Best->SU, I->SU);
|
||
|
if (NewBestSU != Best->SU) {
|
||
|
assert(NewBestSU == I->SU);
|
||
|
Best = I;
|
||
|
}
|
||
|
}
|
||
|
return &*Best;
|
||
|
}
|
||
|
|
||
|
void GCNILPScheduler::releasePending() {
|
||
|
// Check to see if any of the pending instructions are ready to issue. If
|
||
|
// so, add them to the available queue.
|
||
|
for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
|
||
|
auto &C = *I++;
|
||
|
if (C.SU->getHeight() <= CurCycle) {
|
||
|
PendingQueue.remove(C);
|
||
|
AvailQueue.push_back(C);
|
||
|
C.SU->NodeQueueId = CurQueueId++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// Move the scheduler state forward by the specified number of Cycles.
|
||
|
void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
|
||
|
if (NextCycle <= CurCycle)
|
||
|
return;
|
||
|
CurCycle = NextCycle;
|
||
|
releasePending();
|
||
|
}
|
||
|
|
||
|
void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
|
||
|
for (const auto &PredEdge : SU->Preds) {
|
||
|
auto PredSU = PredEdge.getSUnit();
|
||
|
if (PredEdge.isWeak())
|
||
|
continue;
|
||
|
assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
|
||
|
|
||
|
PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
|
||
|
|
||
|
if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
|
||
|
PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::vector<const SUnit*>
|
||
|
GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
|
||
|
const ScheduleDAG &DAG) {
|
||
|
auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
|
||
|
|
||
|
std::vector<SUnit> SUSavedCopy;
|
||
|
SUSavedCopy.resize(SUnits.size());
|
||
|
|
||
|
// we cannot save only those fields we touch: some of them are private
|
||
|
// so save units verbatim: this assumes SUnit should have value semantics
|
||
|
for (const SUnit &SU : SUnits)
|
||
|
SUSavedCopy[SU.NodeNum] = SU;
|
||
|
|
||
|
SUNumbers.assign(SUnits.size(), 0);
|
||
|
for (const SUnit &SU : SUnits)
|
||
|
CalcNodeSethiUllmanNumber(&SU, SUNumbers);
|
||
|
|
||
|
for (auto SU : BotRoots) {
|
||
|
AvailQueue.push_back(
|
||
|
*new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
|
||
|
}
|
||
|
releasePredecessors(&DAG.ExitSU);
|
||
|
|
||
|
std::vector<const SUnit*> Schedule;
|
||
|
Schedule.reserve(SUnits.size());
|
||
|
while (true) {
|
||
|
if (AvailQueue.empty() && !PendingQueue.empty()) {
|
||
|
auto EarliestSU = std::min_element(
|
||
|
PendingQueue.begin(), PendingQueue.end(),
|
||
|
[=](const Candidate& C1, const Candidate& C2) {
|
||
|
return C1.SU->getHeight() < C2.SU->getHeight();
|
||
|
})->SU;
|
||
|
advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
|
||
|
}
|
||
|
if (AvailQueue.empty())
|
||
|
break;
|
||
|
|
||
|
LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
|
||
|
"Ready queue:";
|
||
|
for (auto &C
|
||
|
: AvailQueue) dbgs()
|
||
|
<< ' ' << C.SU->NodeNum;
|
||
|
dbgs() << '\n';);
|
||
|
|
||
|
auto C = pickCandidate();
|
||
|
assert(C);
|
||
|
AvailQueue.remove(*C);
|
||
|
auto SU = C->SU;
|
||
|
LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
|
||
|
|
||
|
advanceToCycle(SU->getHeight());
|
||
|
|
||
|
releasePredecessors(SU);
|
||
|
Schedule.push_back(SU);
|
||
|
SU->isScheduled = true;
|
||
|
}
|
||
|
assert(SUnits.size() == Schedule.size());
|
||
|
|
||
|
std::reverse(Schedule.begin(), Schedule.end());
|
||
|
|
||
|
// restore units
|
||
|
for (auto &SU : SUnits)
|
||
|
SU = SUSavedCopy[SU.NodeNum];
|
||
|
|
||
|
return Schedule;
|
||
|
}
|
||
|
|
||
|
namespace llvm {
|
||
|
std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
|
||
|
const ScheduleDAG &DAG) {
|
||
|
GCNILPScheduler S;
|
||
|
return S.schedule(BotRoots, DAG);
|
||
|
}
|
||
|
}
|