llvm-for-llvmta/lib/Target/AMDGPU/GCNILPSched.cpp

//===---------------------------- GCNILPSched.cpp - -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/ScheduleDAG.h"

using namespace llvm;

#define DEBUG_TYPE "machine-scheduler"

namespace {

class GCNILPScheduler {
  struct Candidate : ilist_node<Candidate> {
    SUnit *SU;

    Candidate(SUnit *SU_)
      : SU(SU_) {}
  };

  SpecificBumpPtrAllocator<Candidate> Alloc;
  typedef simple_ilist<Candidate> Queue;
  Queue PendingQueue;
  Queue AvailQueue;
  unsigned CurQueueId = 0;

  std::vector<unsigned> SUNumbers;

  /// CurCycle - The current scheduler state corresponds to this cycle.
  unsigned CurCycle = 0;

  unsigned getNodePriority(const SUnit *SU) const;

  const SUnit *pickBest(const SUnit *left, const SUnit *right);
  Candidate* pickCandidate();

  void releasePending();
  void advanceToCycle(unsigned NextCycle);
  void releasePredecessors(const SUnit* SU);

public:
  std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
                                     const ScheduleDAG &DAG);
};
} // namespace

/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
static unsigned
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
  if (SethiUllmanNumber != 0)
    return SethiUllmanNumber;

  unsigned Extra = 0;
  for (const SDep &Pred : SU->Preds) {
    if (Pred.isCtrl()) continue;  // ignore chain preds
    SUnit *PredSU = Pred.getSUnit();
    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
    if (PredSethiUllman > SethiUllmanNumber) {
      SethiUllmanNumber = PredSethiUllman;
      Extra = 0;
    }
    else if (PredSethiUllman == SethiUllmanNumber)
      ++Extra;
  }

  SethiUllmanNumber += Extra;

  if (SethiUllmanNumber == 0)
    SethiUllmanNumber = 1;

  return SethiUllmanNumber;
}

// Lower priority means schedule further down. For bottom-up scheduling, lower
// priority SUs are scheduled before higher priority SUs.
unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
  assert(SU->NodeNum < SUNumbers.size());
  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
    // If SU does not have a register use, i.e. it doesn't produce a value
    // that would be consumed (e.g. store), then it terminates a chain of
    // computation.  Give it a large SethiUllman number so it will be
    // scheduled right before its predecessors that it doesn't lengthen
    // their live ranges.
    return 0xffff;

  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
    // If SU does not have a register def, schedule it close to its uses
    // because it does not lengthen any live ranges.
    return 0;

  return SUNumbers[SU->NodeNum];
}

/// closestSucc - Returns the scheduled cycle of the successor which is
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
  unsigned MaxHeight = 0;
  for (const SDep &Succ : SU->Succs) {
    if (Succ.isCtrl()) continue;  // ignore chain succs
    unsigned Height = Succ.getSUnit()->getHeight();
    // If there are bunch of CopyToRegs stacked up, they should be considered
    // to be at the same position.
    if (Height > MaxHeight)
      MaxHeight = Height;
  }
  return MaxHeight;
}

/// calcMaxScratches - Returns an cost estimate of the worse case requirement
/// for scratch registers, i.e. number of data dependencies.
static unsigned calcMaxScratches(const SUnit *SU) {
  unsigned Scratches = 0;
  for (const SDep &Pred : SU->Preds) {
    if (Pred.isCtrl()) continue;  // ignore chain preds
    Scratches++;
  }
  return Scratches;
}

// Return -1 if left has higher priority, 1 if right has higher priority.
// Return 0 if latency-based priority is equivalent.
static int BUCompareLatency(const SUnit *left, const SUnit *right) {
  // Scheduling an instruction that uses a VReg whose postincrement has not yet
  // been scheduled will induce a copy. Model this as an extra cycle of latency.
  int LHeight = (int)left->getHeight();
  int RHeight = (int)right->getHeight();

  // If either node is scheduling for latency, sort them by height/depth
  // and latency.

  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
  // is enabled, grouping instructions by cycle, then its height is already
  // covered so only its depth matters. We also reach this point if both stall
  // but have the same height.
  if (LHeight != RHeight)
    return LHeight > RHeight ? 1 : -1;

  int LDepth = left->getDepth();
  int RDepth = right->getDepth();
  if (LDepth != RDepth) {
    LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
                      << ") depth " << LDepth << " vs SU (" << right->NodeNum
                      << ") depth " << RDepth << "\n");
    return LDepth < RDepth ? 1 : -1;
  }
  if (left->Latency != right->Latency)
    return left->Latency > right->Latency ? 1 : -1;

  return 0;
}

const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
{
  // TODO: add register pressure lowering checks

  bool const DisableSchedCriticalPath = false;
  int MaxReorderWindow = 6;
  if (!DisableSchedCriticalPath) {
    int spread = (int)left->getDepth() - (int)right->getDepth();
    if (std::abs(spread) > MaxReorderWindow) {
      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
                        << left->getDepth() << " != SU(" << right->NodeNum
                        << "): " << right->getDepth() << "\n");
      return left->getDepth() < right->getDepth() ? right : left;
    }
  }

  bool const DisableSchedHeight = false;
  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
    int spread = (int)left->getHeight() - (int)right->getHeight();
    if (std::abs(spread) > MaxReorderWindow)
      return left->getHeight() > right->getHeight() ? right : left;
  }

  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
  unsigned LPriority = getNodePriority(left);
  unsigned RPriority = getNodePriority(right);

  if (LPriority != RPriority)
    return LPriority > RPriority ? right : left;

  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
  // e.g.
  // t1 = op t2, c1
  // t3 = op t4, c2
  //
  // and the following instructions are both ready.
  // t2 = op c3
  // t4 = op c4
  //
  // Then schedule t2 = op first.
  // i.e.
  // t4 = op c4
  // t2 = op c3
  // t1 = op t2, c1
  // t3 = op t4, c2
  //
  // This creates more short live intervals.
  unsigned LDist = closestSucc(left);
  unsigned RDist = closestSucc(right);
  if (LDist != RDist)
    return LDist < RDist ? right : left;

  // How many registers becomes live when the node is scheduled.
  unsigned LScratch = calcMaxScratches(left);
  unsigned RScratch = calcMaxScratches(right);
  if (LScratch != RScratch)
    return LScratch > RScratch ? right : left;

  bool const DisableSchedCycles = false;
  if (!DisableSchedCycles) {
    int result = BUCompareLatency(left, right);
    if (result != 0)
      return result > 0 ? right : left;
    return left;
  }
  else {
    if (left->getHeight() != right->getHeight())
      return (left->getHeight() > right->getHeight()) ? right : left;

    if (left->getDepth() != right->getDepth())
      return (left->getDepth() < right->getDepth()) ? right : left;
  }

  assert(left->NodeQueueId && right->NodeQueueId &&
        "NodeQueueId cannot be zero");
  return (left->NodeQueueId > right->NodeQueueId) ? right : left;
}

GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
  if (AvailQueue.empty())
    return nullptr;
  auto Best = AvailQueue.begin();
  for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
    auto NewBestSU = pickBest(Best->SU, I->SU);
    if (NewBestSU != Best->SU) {
      assert(NewBestSU == I->SU);
      Best = I;
    }
  }
  return &*Best;
}

void GCNILPScheduler::releasePending() {
  // Check to see if any of the pending instructions are ready to issue.  If
  // so, add them to the available queue.
  for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
    auto &C = *I++;
    if (C.SU->getHeight() <= CurCycle) {
      PendingQueue.remove(C);
      AvailQueue.push_back(C);
      C.SU->NodeQueueId = CurQueueId++;
    }
  }
}

/// Move the scheduler state forward by the specified number of Cycles.
void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
  if (NextCycle <= CurCycle)
    return;
  CurCycle = NextCycle;
  releasePending();
}

void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
  for (const auto &PredEdge : SU->Preds) {
    auto PredSU = PredEdge.getSUnit();
    if (PredEdge.isWeak())
      continue;
    assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);

    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());

    if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
      PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
  }
}

std::vector<const SUnit*>
GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
                          const ScheduleDAG &DAG) {
  auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;

  std::vector<SUnit> SUSavedCopy;
  SUSavedCopy.resize(SUnits.size());

  // we cannot save only those fields we touch: some of them are private
  // so save units verbatim: this assumes SUnit should have value semantics
  for (const SUnit &SU : SUnits)
    SUSavedCopy[SU.NodeNum] = SU;

  SUNumbers.assign(SUnits.size(), 0);
  for (const SUnit &SU : SUnits)
    CalcNodeSethiUllmanNumber(&SU, SUNumbers);

  for (auto SU : BotRoots) {
    AvailQueue.push_back(
      *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
  }
  releasePredecessors(&DAG.ExitSU);

  std::vector<const SUnit*> Schedule;
  Schedule.reserve(SUnits.size());
  while (true) {
    if (AvailQueue.empty() && !PendingQueue.empty()) {
      auto EarliestSU = std::min_element(
        PendingQueue.begin(), PendingQueue.end(),
        [=](const Candidate& C1, const Candidate& C2) {
        return C1.SU->getHeight() < C2.SU->getHeight();
      })->SU;
      advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
    }
    if (AvailQueue.empty())
      break;

    LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
                         "Ready queue:";
               for (auto &C
                    : AvailQueue) dbgs()
               << ' ' << C.SU->NodeNum;
               dbgs() << '\n';);

    auto C = pickCandidate();
    assert(C);
    AvailQueue.remove(*C);
    auto SU = C->SU;
    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));

    advanceToCycle(SU->getHeight());

    releasePredecessors(SU);
    Schedule.push_back(SU);
    SU->isScheduled = true;
  }
  assert(SUnits.size() == Schedule.size());

  std::reverse(Schedule.begin(), Schedule.end());

  // restore units
  for (auto &SU : SUnits)
    SU = SUSavedCopy[SU.NodeNum];

  return Schedule;
}

namespace llvm {
std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
                                              const ScheduleDAG &DAG) {
  GCNILPScheduler S;
  return S.schedule(BotRoots, DAG);
}
}
first commit 2022-04-25 10:02:23 +02:00			`//===---------------------------- GCNILPSched.cpp - -----------------------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`/// \file`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "llvm/CodeGen/ScheduleDAG.h"`

			`using namespace llvm;`

			`#define DEBUG_TYPE "machine-scheduler"`

			`namespace {`

			`class GCNILPScheduler {`
			`struct Candidate : ilist_node<Candidate> {`
			`SUnit *SU;`

			`Candidate(SUnit *SU_)`
			`: SU(SU_) {}`
			`};`

			`SpecificBumpPtrAllocator<Candidate> Alloc;`
			`typedef simple_ilist<Candidate> Queue;`
			`Queue PendingQueue;`
			`Queue AvailQueue;`
			`unsigned CurQueueId = 0;`

			`std::vector<unsigned> SUNumbers;`

			`/// CurCycle - The current scheduler state corresponds to this cycle.`
			`unsigned CurCycle = 0;`

			`unsigned getNodePriority(const SUnit *SU) const;`

			`const SUnit pickBest(const SUnit left, const SUnit *right);`
			`Candidate* pickCandidate();`

			`void releasePending();`
			`void advanceToCycle(unsigned NextCycle);`
			`void releasePredecessors(const SUnit* SU);`

			`public:`
			`std::vector<const SUnit> schedule(ArrayRef<const SUnit> TopRoots,`
			`const ScheduleDAG &DAG);`
			`};`
			`} // namespace`

			`/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.`
			`/// Smaller number is the higher priority.`
			`static unsigned`
			`CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {`
			`unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];`
			`if (SethiUllmanNumber != 0)`
			`return SethiUllmanNumber;`

			`unsigned Extra = 0;`
			`for (const SDep &Pred : SU->Preds) {`
			`if (Pred.isCtrl()) continue; // ignore chain preds`
			`SUnit *PredSU = Pred.getSUnit();`
			`unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);`
			`if (PredSethiUllman > SethiUllmanNumber) {`
			`SethiUllmanNumber = PredSethiUllman;`
			`Extra = 0;`
			`}`
			`else if (PredSethiUllman == SethiUllmanNumber)`
			`++Extra;`
			`}`

			`SethiUllmanNumber += Extra;`

			`if (SethiUllmanNumber == 0)`
			`SethiUllmanNumber = 1;`

			`return SethiUllmanNumber;`
			`}`

			`// Lower priority means schedule further down. For bottom-up scheduling, lower`
			`// priority SUs are scheduled before higher priority SUs.`
			`unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {`
			`assert(SU->NodeNum < SUNumbers.size());`
			`if (SU->NumSuccs == 0 && SU->NumPreds != 0)`
			`// If SU does not have a register use, i.e. it doesn't produce a value`
			`// that would be consumed (e.g. store), then it terminates a chain of`
			`// computation. Give it a large SethiUllman number so it will be`
			`// scheduled right before its predecessors that it doesn't lengthen`
			`// their live ranges.`
			`return 0xffff;`

			`if (SU->NumPreds == 0 && SU->NumSuccs != 0)`
			`// If SU does not have a register def, schedule it close to its uses`
			`// because it does not lengthen any live ranges.`
			`return 0;`

			`return SUNumbers[SU->NodeNum];`
			`}`

			`/// closestSucc - Returns the scheduled cycle of the successor which is`
			`/// closest to the current cycle.`
			`static unsigned closestSucc(const SUnit *SU) {`
			`unsigned MaxHeight = 0;`
			`for (const SDep &Succ : SU->Succs) {`
			`if (Succ.isCtrl()) continue; // ignore chain succs`
			`unsigned Height = Succ.getSUnit()->getHeight();`
			`// If there are bunch of CopyToRegs stacked up, they should be considered`
			`// to be at the same position.`
			`if (Height > MaxHeight)`
			`MaxHeight = Height;`
			`}`
			`return MaxHeight;`
			`}`

			`/// calcMaxScratches - Returns an cost estimate of the worse case requirement`
			`/// for scratch registers, i.e. number of data dependencies.`
			`static unsigned calcMaxScratches(const SUnit *SU) {`
			`unsigned Scratches = 0;`
			`for (const SDep &Pred : SU->Preds) {`
			`if (Pred.isCtrl()) continue; // ignore chain preds`
			`Scratches++;`
			`}`
			`return Scratches;`
			`}`

			`// Return -1 if left has higher priority, 1 if right has higher priority.`
			`// Return 0 if latency-based priority is equivalent.`
			`static int BUCompareLatency(const SUnit left, const SUnit right) {`
			`// Scheduling an instruction that uses a VReg whose postincrement has not yet`
			`// been scheduled will induce a copy. Model this as an extra cycle of latency.`
			`int LHeight = (int)left->getHeight();`
			`int RHeight = (int)right->getHeight();`

			`// If either node is scheduling for latency, sort them by height/depth`
			`// and latency.`

			`// If neither instruction stalls (!LStall && !RStall) and HazardRecognizer`
			`// is enabled, grouping instructions by cycle, then its height is already`
			`// covered so only its depth matters. We also reach this point if both stall`
			`// but have the same height.`
			`if (LHeight != RHeight)`
			`return LHeight > RHeight ? 1 : -1;`

			`int LDepth = left->getDepth();`
			`int RDepth = right->getDepth();`
			`if (LDepth != RDepth) {`
			`LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum`
			`<< ") depth " << LDepth << " vs SU (" << right->NodeNum`
			`<< ") depth " << RDepth << "\n");`
			`return LDepth < RDepth ? 1 : -1;`
			`}`
			`if (left->Latency != right->Latency)`
			`return left->Latency > right->Latency ? 1 : -1;`

			`return 0;`
			`}`

			`const SUnit GCNILPScheduler::pickBest(const SUnit left, const SUnit *right)`
			`{`
			`// TODO: add register pressure lowering checks`

			`bool const DisableSchedCriticalPath = false;`
			`int MaxReorderWindow = 6;`
			`if (!DisableSchedCriticalPath) {`
			`int spread = (int)left->getDepth() - (int)right->getDepth();`
			`if (std::abs(spread) > MaxReorderWindow) {`
			`LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "`
			`<< left->getDepth() << " != SU(" << right->NodeNum`
			`<< "): " << right->getDepth() << "\n");`
			`return left->getDepth() < right->getDepth() ? right : left;`
			`}`
			`}`

			`bool const DisableSchedHeight = false;`
			`if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {`
			`int spread = (int)left->getHeight() - (int)right->getHeight();`
			`if (std::abs(spread) > MaxReorderWindow)`
			`return left->getHeight() > right->getHeight() ? right : left;`
			`}`

			`// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.`
			`unsigned LPriority = getNodePriority(left);`
			`unsigned RPriority = getNodePriority(right);`

			`if (LPriority != RPriority)`
			`return LPriority > RPriority ? right : left;`

			`// Try schedule def + use closer when Sethi-Ullman numbers are the same.`
			`// e.g.`
			`// t1 = op t2, c1`
			`// t3 = op t4, c2`
			`//`
			`// and the following instructions are both ready.`
			`// t2 = op c3`
			`// t4 = op c4`
			`//`
			`// Then schedule t2 = op first.`
			`// i.e.`
			`// t4 = op c4`
			`// t2 = op c3`
			`// t1 = op t2, c1`
			`// t3 = op t4, c2`
			`//`
			`// This creates more short live intervals.`
			`unsigned LDist = closestSucc(left);`
			`unsigned RDist = closestSucc(right);`
			`if (LDist != RDist)`
			`return LDist < RDist ? right : left;`

			`// How many registers becomes live when the node is scheduled.`
			`unsigned LScratch = calcMaxScratches(left);`
			`unsigned RScratch = calcMaxScratches(right);`
			`if (LScratch != RScratch)`
			`return LScratch > RScratch ? right : left;`

			`bool const DisableSchedCycles = false;`
			`if (!DisableSchedCycles) {`
			`int result = BUCompareLatency(left, right);`
			`if (result != 0)`
			`return result > 0 ? right : left;`
			`return left;`
			`}`
			`else {`
			`if (left->getHeight() != right->getHeight())`
			`return (left->getHeight() > right->getHeight()) ? right : left;`

			`if (left->getDepth() != right->getDepth())`
			`return (left->getDepth() < right->getDepth()) ? right : left;`
			`}`

			`assert(left->NodeQueueId && right->NodeQueueId &&`
			`"NodeQueueId cannot be zero");`
			`return (left->NodeQueueId > right->NodeQueueId) ? right : left;`
			`}`

			`GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {`
			`if (AvailQueue.empty())`
			`return nullptr;`
			`auto Best = AvailQueue.begin();`
			`for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {`
			`auto NewBestSU = pickBest(Best->SU, I->SU);`
			`if (NewBestSU != Best->SU) {`
			`assert(NewBestSU == I->SU);`
			`Best = I;`
			`}`
			`}`
			`return &*Best;`
			`}`

			`void GCNILPScheduler::releasePending() {`
			`// Check to see if any of the pending instructions are ready to issue. If`
			`// so, add them to the available queue.`
			`for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {`
			`auto &C = *I++;`
			`if (C.SU->getHeight() <= CurCycle) {`
			`PendingQueue.remove(C);`
			`AvailQueue.push_back(C);`
			`C.SU->NodeQueueId = CurQueueId++;`
			`}`
			`}`
			`}`

			`/// Move the scheduler state forward by the specified number of Cycles.`
			`void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {`
			`if (NextCycle <= CurCycle)`
			`return;`
			`CurCycle = NextCycle;`
			`releasePending();`
			`}`

			`void GCNILPScheduler::releasePredecessors(const SUnit* SU) {`
			`for (const auto &PredEdge : SU->Preds) {`
			`auto PredSU = PredEdge.getSUnit();`
			`if (PredEdge.isWeak())`
			`continue;`
			`assert(PredSU->isBoundaryNode() \|\| PredSU->NumSuccsLeft > 0);`

			`PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());`

			`if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)`
			`PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));`
			`}`
			`}`

			`std::vector<const SUnit*>`
			`GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,`
			`const ScheduleDAG &DAG) {`
			`auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;`

			`std::vector<SUnit> SUSavedCopy;`
			`SUSavedCopy.resize(SUnits.size());`

			`// we cannot save only those fields we touch: some of them are private`
			`// so save units verbatim: this assumes SUnit should have value semantics`
			`for (const SUnit &SU : SUnits)`
			`SUSavedCopy[SU.NodeNum] = SU;`

			`SUNumbers.assign(SUnits.size(), 0);`
			`for (const SUnit &SU : SUnits)`
			`CalcNodeSethiUllmanNumber(&SU, SUNumbers);`

			`for (auto SU : BotRoots) {`
			`AvailQueue.push_back(`
			`new (Alloc.Allocate()) Candidate(const_cast<SUnit>(SU)));`
			`}`
			`releasePredecessors(&DAG.ExitSU);`

			`std::vector<const SUnit*> Schedule;`
			`Schedule.reserve(SUnits.size());`
			`while (true) {`
			`if (AvailQueue.empty() && !PendingQueue.empty()) {`
			`auto EarliestSU = std::min_element(`
			`PendingQueue.begin(), PendingQueue.end(),`
			`[=](const Candidate& C1, const Candidate& C2) {`
			`return C1.SU->getHeight() < C2.SU->getHeight();`
			`})->SU;`
			`advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));`
			`}`
			`if (AvailQueue.empty())`
			`break;`

			`LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"`
			`"Ready queue:";`
			`for (auto &C`
			`: AvailQueue) dbgs()`
			`<< ' ' << C.SU->NodeNum;`
			`dbgs() << '\n';);`

			`auto C = pickCandidate();`
			`assert(C);`
			`AvailQueue.remove(*C);`
			`auto SU = C->SU;`
			`LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));`

			`advanceToCycle(SU->getHeight());`

			`releasePredecessors(SU);`
			`Schedule.push_back(SU);`
			`SU->isScheduled = true;`
			`}`
			`assert(SUnits.size() == Schedule.size());`

			`std::reverse(Schedule.begin(), Schedule.end());`

			`// restore units`
			`for (auto &SU : SUnits)`
			`SU = SUSavedCopy[SU.NodeNum];`

			`return Schedule;`
			`}`

			`namespace llvm {`
			`std::vector<const SUnit> makeGCNILPScheduler(ArrayRef<const SUnit> BotRoots,`
			`const ScheduleDAG &DAG) {`
			`GCNILPScheduler S;`
			`return S.schedule(BotRoots, DAG);`
			`}`
			`}`