llvm-for-llvmta/lib/Target/AMDGPU/GCNNSAReassign.cpp

//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
/// with sequential versions where possible.
///
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"

using namespace llvm;

#define DEBUG_TYPE "amdgpu-nsa-reassign"

STATISTIC(NumNSAInstructions,
          "Number of NSA instructions with non-sequential address found");
STATISTIC(NumNSAConverted,
          "Number of NSA instructions changed to sequential");

namespace {

class GCNNSAReassign : public MachineFunctionPass {
public:
  static char ID;

  GCNNSAReassign() : MachineFunctionPass(ID) {
    initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
  }

  bool runOnMachineFunction(MachineFunction &MF) override;

  StringRef getPassName() const override { return "GCN NSA Reassign"; }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<LiveIntervals>();
    AU.addRequired<VirtRegMap>();
    AU.addRequired<LiveRegMatrix>();
    AU.setPreservesAll();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

private:
  typedef enum {
    NOT_NSA,        // Not an NSA instruction
    FIXED,          // NSA which we cannot modify
    NON_CONTIGUOUS, // NSA with non-sequential address which we can try
                    // to optimize.
    CONTIGUOUS      // NSA with all sequential address registers
  } NSA_Status;

  const GCNSubtarget *ST;

  const MachineRegisterInfo *MRI;

  const SIRegisterInfo *TRI;

  VirtRegMap *VRM;

  LiveRegMatrix *LRM;

  LiveIntervals *LIS;

  unsigned MaxNumVGPRs;

  const MCPhysReg *CSRegs;

  NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;

  bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
                          unsigned StartReg) const;

  bool canAssign(unsigned StartReg, unsigned NumRegs) const;

  bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
                      false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
                    false, false)


char GCNNSAReassign::ID = 0;

char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;

bool
GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
                                   unsigned StartReg) const {
  unsigned NumRegs = Intervals.size();

  for (unsigned N = 0; N < NumRegs; ++N)
    if (VRM->hasPhys(Intervals[N]->reg()))
      LRM->unassign(*Intervals[N]);

  for (unsigned N = 0; N < NumRegs; ++N)
    if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
      return false;

  for (unsigned N = 0; N < NumRegs; ++N)
    LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));

  return true;
}

bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
  for (unsigned N = 0; N < NumRegs; ++N) {
    unsigned Reg = StartReg + N;
    if (!MRI->isAllocatable(Reg))
      return false;

    for (unsigned I = 0; CSRegs[I]; ++I)
      if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
          !LRM->isPhysRegUsed(CSRegs[I]))
      return false;
  }

  return true;
}

bool
GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
  unsigned NumRegs = Intervals.size();

  if (NumRegs > MaxNumVGPRs)
    return false;
  unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;

  for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
    if (!canAssign(Reg, NumRegs))
      continue;

    if (tryAssignRegisters(Intervals, Reg))
      return true;
  }

  return false;
}

GCNNSAReassign::NSA_Status
GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
  if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
    return NSA_Status::NOT_NSA;

  int VAddr0Idx =
    AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);

  unsigned VgprBase = 0;
  bool NSA = false;
  for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
    const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
    Register Reg = Op.getReg();
    if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
      return NSA_Status::FIXED;

    Register PhysReg = VRM->getPhys(Reg);

    if (!Fast) {
      if (!PhysReg)
        return NSA_Status::FIXED;

      // Bail if address is not a VGPR32. That should be possible to extend the
      // optimization to work with subregs of a wider register tuples, but the
      // logic to find free registers will be much more complicated with much
      // less chances for success. That seems reasonable to assume that in most
      // cases a tuple is used because a vector variable contains different
      // parts of an address and it is either already consequitive or cannot
      // be reassigned if not. If needed it is better to rely on register
      // coalescer to process such address tuples.
      if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
        return NSA_Status::FIXED;

      const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);

      if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
        return NSA_Status::FIXED;

      for (auto U : MRI->use_nodbg_operands(Reg)) {
        if (U.isImplicit())
          return NSA_Status::FIXED;
        const MachineInstr *UseInst = U.getParent();
        if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
          return NSA_Status::FIXED;
      }

      if (!LIS->hasInterval(Reg))
        return NSA_Status::FIXED;
    }

    if (I == 0)
      VgprBase = PhysReg;
    else if (VgprBase + I != PhysReg)
      NSA = true;
  }

  return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
}

bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
  ST = &MF.getSubtarget<GCNSubtarget>();
  if (ST->getGeneration() < GCNSubtarget::GFX10)
    return false;

  MRI = &MF.getRegInfo();
  TRI = ST->getRegisterInfo();
  VRM = &getAnalysis<VirtRegMap>();
  LRM = &getAnalysis<LiveRegMatrix>();
  LIS = &getAnalysis<LiveIntervals>();

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
  CSRegs = MRI->getCalleeSavedRegs();

  using Candidate = std::pair<const MachineInstr*, bool>;
  SmallVector<Candidate, 32> Candidates;
  for (const MachineBasicBlock &MBB : MF) {
    for (const MachineInstr &MI : MBB) {
      switch (CheckNSA(MI)) {
      default:
        continue;
      case NSA_Status::CONTIGUOUS:
        Candidates.push_back(std::make_pair(&MI, true));
        break;
      case NSA_Status::NON_CONTIGUOUS:
        Candidates.push_back(std::make_pair(&MI, false));
        ++NumNSAInstructions;
        break;
      }
    }
  }

  bool Changed = false;
  for (auto &C : Candidates) {
    if (C.second)
      continue;

    const MachineInstr *MI = C.first;
    if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
      // Already happen to be fixed.
      C.second = true;
      ++NumNSAConverted;
      continue;
    }

    const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
    int VAddr0Idx =
      AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);

    SmallVector<LiveInterval *, 16> Intervals;
    SmallVector<MCRegister, 16> OrigRegs;
    SlotIndex MinInd, MaxInd;
    for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
      const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
      Register Reg = Op.getReg();
      LiveInterval *LI = &LIS->getInterval(Reg);
      if (llvm::is_contained(Intervals, LI)) {
        // Same register used, unable to make sequential
        Intervals.clear();
        break;
      }
      Intervals.push_back(LI);
      OrigRegs.push_back(VRM->getPhys(Reg));
      if (LI->empty()) {
        // The address input is undef, so it doesn't contribute to the relevant
        // range. Seed a reasonable index range if required.
        if (I == 0)
          MinInd = MaxInd = LIS->getInstructionIndex(*MI);
        continue;
      }
      MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
      MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
    }

    if (Intervals.empty())
      continue;

    LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
                      << "\tOriginal allocation:\t";
               for (auto *LI
                    : Intervals) dbgs()
               << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);
               dbgs() << '\n');

    bool Success = scavengeRegs(Intervals);
    if (!Success) {
      LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
      if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation.
        continue;
    } else {
      // Check we did not make it worse for other instructions.
      auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
                                [this](const Candidate &C, SlotIndex I) {
                                  return LIS->getInstructionIndex(*C.first) < I;
                                });
      for (auto E = Candidates.end(); Success && I != E &&
              LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
        if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
          Success = false;
          LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
        }
      }
    }

    if (!Success) {
      for (unsigned I = 0; I < Info->VAddrDwords; ++I)
        if (VRM->hasPhys(Intervals[I]->reg()))
          LRM->unassign(*Intervals[I]);

      for (unsigned I = 0; I < Info->VAddrDwords; ++I)
        LRM->assign(*Intervals[I], OrigRegs[I]);

      continue;
    }

    C.second = true;
    ++NumNSAConverted;
    LLVM_DEBUG(
        dbgs() << "\tNew allocation:\t\t ["
               << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)
               << " : "
               << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)
               << "]\n");
    Changed = true;
  }

  return Changed;
}
first commit 2022-04-25 10:02:23 +02:00			`//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`/// \file`
			`/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential`
			`/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA`
			`/// with sequential versions where possible.`
			`///`
			`//===----------------------------------------------------------------------===//`

			`#include "AMDGPU.h"`
			`#include "GCNSubtarget.h"`
			`#include "SIMachineFunctionInfo.h"`
			`#include "llvm/ADT/Statistic.h"`
			`#include "llvm/CodeGen/LiveIntervals.h"`
			`#include "llvm/CodeGen/LiveRegMatrix.h"`
			`#include "llvm/CodeGen/MachineFunctionPass.h"`
			`#include "llvm/InitializePasses.h"`

			`using namespace llvm;`

			`#define DEBUG_TYPE "amdgpu-nsa-reassign"`

			`STATISTIC(NumNSAInstructions,`
			`"Number of NSA instructions with non-sequential address found");`
			`STATISTIC(NumNSAConverted,`
			`"Number of NSA instructions changed to sequential");`

			`namespace {`

			`class GCNNSAReassign : public MachineFunctionPass {`
			`public:`
			`static char ID;`

			`GCNNSAReassign() : MachineFunctionPass(ID) {`
			`initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());`
			`}`

			`bool runOnMachineFunction(MachineFunction &MF) override;`

			`StringRef getPassName() const override { return "GCN NSA Reassign"; }`

			`void getAnalysisUsage(AnalysisUsage &AU) const override {`
			`AU.addRequired<LiveIntervals>();`
			`AU.addRequired<VirtRegMap>();`
			`AU.addRequired<LiveRegMatrix>();`
			`AU.setPreservesAll();`
			`MachineFunctionPass::getAnalysisUsage(AU);`
			`}`

			`private:`
			`typedef enum {`
			`NOT_NSA, // Not an NSA instruction`
			`FIXED, // NSA which we cannot modify`
			`NON_CONTIGUOUS, // NSA with non-sequential address which we can try`
			`// to optimize.`
			`CONTIGUOUS // NSA with all sequential address registers`
			`} NSA_Status;`

			`const GCNSubtarget *ST;`

			`const MachineRegisterInfo *MRI;`

			`const SIRegisterInfo *TRI;`

			`VirtRegMap *VRM;`

			`LiveRegMatrix *LRM;`

			`LiveIntervals *LIS;`

			`unsigned MaxNumVGPRs;`

			`const MCPhysReg *CSRegs;`

			`NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;`

			`bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,`
			`unsigned StartReg) const;`

			`bool canAssign(unsigned StartReg, unsigned NumRegs) const;`

			`bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;`
			`};`

			`} // End anonymous namespace.`

			`INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",`
			`false, false)`
			`INITIALIZE_PASS_DEPENDENCY(LiveIntervals)`
			`INITIALIZE_PASS_DEPENDENCY(VirtRegMap)`
			`INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)`
			`INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",`
			`false, false)`


			`char GCNNSAReassign::ID = 0;`

			`char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;`

			`bool`
			`GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,`
			`unsigned StartReg) const {`
			`unsigned NumRegs = Intervals.size();`

			`for (unsigned N = 0; N < NumRegs; ++N)`
			`if (VRM->hasPhys(Intervals[N]->reg()))`
			`LRM->unassign(*Intervals[N]);`

			`for (unsigned N = 0; N < NumRegs; ++N)`
			`if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))`
			`return false;`

			`for (unsigned N = 0; N < NumRegs; ++N)`
			`LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));`

			`return true;`
			`}`

			`bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {`
			`for (unsigned N = 0; N < NumRegs; ++N) {`
			`unsigned Reg = StartReg + N;`
			`if (!MRI->isAllocatable(Reg))`
			`return false;`

			`for (unsigned I = 0; CSRegs[I]; ++I)`
			`if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&`
			`!LRM->isPhysRegUsed(CSRegs[I]))`
			`return false;`
			`}`

			`return true;`
			`}`

			`bool`
			`GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {`
			`unsigned NumRegs = Intervals.size();`

			`if (NumRegs > MaxNumVGPRs)`
			`return false;`
			`unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;`

			`for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {`
			`if (!canAssign(Reg, NumRegs))`
			`continue;`

			`if (tryAssignRegisters(Intervals, Reg))`
			`return true;`
			`}`

			`return false;`
			`}`

			`GCNNSAReassign::NSA_Status`
			`GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {`
			`const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());`
			`if (!Info \|\| Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)`
			`return NSA_Status::NOT_NSA;`

			`int VAddr0Idx =`
			`AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);`

			`unsigned VgprBase = 0;`
			`bool NSA = false;`
			`for (unsigned I = 0; I < Info->VAddrDwords; ++I) {`
			`const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);`
			`Register Reg = Op.getReg();`
			`if (Reg.isPhysical() \|\| !VRM->isAssignedReg(Reg))`
			`return NSA_Status::FIXED;`

			`Register PhysReg = VRM->getPhys(Reg);`

			`if (!Fast) {`
			`if (!PhysReg)`
			`return NSA_Status::FIXED;`

			`// Bail if address is not a VGPR32. That should be possible to extend the`
			`// optimization to work with subregs of a wider register tuples, but the`
			`// logic to find free registers will be much more complicated with much`
			`// less chances for success. That seems reasonable to assume that in most`
			`// cases a tuple is used because a vector variable contains different`
			`// parts of an address and it is either already consequitive or cannot`
			`// be reassigned if not. If needed it is better to rely on register`
			`// coalescer to process such address tuples.`
			`if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass \|\| Op.getSubReg())`
			`return NSA_Status::FIXED;`

			`const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);`

			`if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)`
			`return NSA_Status::FIXED;`

			`for (auto U : MRI->use_nodbg_operands(Reg)) {`
			`if (U.isImplicit())`
			`return NSA_Status::FIXED;`
			`const MachineInstr *UseInst = U.getParent();`
			`if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)`
			`return NSA_Status::FIXED;`
			`}`

			`if (!LIS->hasInterval(Reg))`
			`return NSA_Status::FIXED;`
			`}`

			`if (I == 0)`
			`VgprBase = PhysReg;`
			`else if (VgprBase + I != PhysReg)`
			`NSA = true;`
			`}`

			`return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;`
			`}`

			`bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {`
			`ST = &MF.getSubtarget<GCNSubtarget>();`
			`if (ST->getGeneration() < GCNSubtarget::GFX10)`
			`return false;`

			`MRI = &MF.getRegInfo();`
			`TRI = ST->getRegisterInfo();`
			`VRM = &getAnalysis<VirtRegMap>();`
			`LRM = &getAnalysis<LiveRegMatrix>();`
			`LIS = &getAnalysis<LiveIntervals>();`

			`const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();`
			`MaxNumVGPRs = ST->getMaxNumVGPRs(MF);`
			`MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);`
			`CSRegs = MRI->getCalleeSavedRegs();`

			`using Candidate = std::pair<const MachineInstr*, bool>;`
			`SmallVector<Candidate, 32> Candidates;`
			`for (const MachineBasicBlock &MBB : MF) {`
			`for (const MachineInstr &MI : MBB) {`
			`switch (CheckNSA(MI)) {`
			`default:`
			`continue;`
			`case NSA_Status::CONTIGUOUS:`
			`Candidates.push_back(std::make_pair(&MI, true));`
			`break;`
			`case NSA_Status::NON_CONTIGUOUS:`
			`Candidates.push_back(std::make_pair(&MI, false));`
			`++NumNSAInstructions;`
			`break;`
			`}`
			`}`
			`}`

			`bool Changed = false;`
			`for (auto &C : Candidates) {`
			`if (C.second)`
			`continue;`

			`const MachineInstr *MI = C.first;`
			`if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {`
			`// Already happen to be fixed.`
			`C.second = true;`
			`++NumNSAConverted;`
			`continue;`
			`}`

			`const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());`
			`int VAddr0Idx =`
			`AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);`

			`SmallVector<LiveInterval *, 16> Intervals;`
			`SmallVector<MCRegister, 16> OrigRegs;`
			`SlotIndex MinInd, MaxInd;`
			`for (unsigned I = 0; I < Info->VAddrDwords; ++I) {`
			`const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);`
			`Register Reg = Op.getReg();`
			`LiveInterval *LI = &LIS->getInterval(Reg);`
			`if (llvm::is_contained(Intervals, LI)) {`
			`// Same register used, unable to make sequential`
			`Intervals.clear();`
			`break;`
			`}`
			`Intervals.push_back(LI);`
			`OrigRegs.push_back(VRM->getPhys(Reg));`
			`if (LI->empty()) {`
			`// The address input is undef, so it doesn't contribute to the relevant`
			`// range. Seed a reasonable index range if required.`
			`if (I == 0)`
			`MinInd = MaxInd = LIS->getInstructionIndex(*MI);`
			`continue;`
			`}`
			`MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();`
			`MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();`
			`}`

			`if (Intervals.empty())`
			`continue;`

			`LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI`
			`<< "\tOriginal allocation:\t";`
			`for (auto *LI`
			`: Intervals) dbgs()`
			`<< " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);`
			`dbgs() << '\n');`

			`bool Success = scavengeRegs(Intervals);`
			`if (!Success) {`
			`LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");`
			`if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation.`
			`continue;`
			`} else {`
			`// Check we did not make it worse for other instructions.`
			`auto I = std::lower_bound(Candidates.begin(), &C, MinInd,`
			`[this](const Candidate &C, SlotIndex I) {`
			`return LIS->getInstructionIndex(*C.first) < I;`
			`});`
			`for (auto E = Candidates.end(); Success && I != E &&`
			`LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {`
			`if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {`
			`Success = false;`
			`LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);`
			`}`
			`}`
			`}`

			`if (!Success) {`
			`for (unsigned I = 0; I < Info->VAddrDwords; ++I)`
			`if (VRM->hasPhys(Intervals[I]->reg()))`
			`LRM->unassign(*Intervals[I]);`

			`for (unsigned I = 0; I < Info->VAddrDwords; ++I)`
			`LRM->assign(*Intervals[I], OrigRegs[I]);`

			`continue;`
			`}`

			`C.second = true;`
			`++NumNSAConverted;`
			`LLVM_DEBUG(`
			`dbgs() << "\tNew allocation:\t\t ["`
			`<< llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)`
			`<< " : "`
			`<< llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)`
			`<< "]\n");`
			`Changed = true;`
			`}`

			`return Changed;`
			`}`