160 lines
4.9 KiB
C++
160 lines
4.9 KiB
C++
|
//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
/// \file
|
||
|
/// This pass optmizes the s_cbranch_execz instructions.
|
||
|
/// The pass removes this skip instruction for short branches,
|
||
|
/// if there is no unwanted sideeffect in the fallthrough code sequence.
|
||
|
///
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "AMDGPU.h"
|
||
|
#include "GCNSubtarget.h"
|
||
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||
|
#include "llvm/Support/CommandLine.h"
|
||
|
|
||
|
using namespace llvm;
|
||
|
|
||
|
#define DEBUG_TYPE "si-remove-short-exec-branches"
|
||
|
|
||
|
static unsigned SkipThreshold;
|
||
|
|
||
|
static cl::opt<unsigned, true> SkipThresholdFlag(
|
||
|
"amdgpu-skip-threshold", cl::Hidden,
|
||
|
cl::desc(
|
||
|
"Number of instructions before jumping over divergent control flow"),
|
||
|
cl::location(SkipThreshold), cl::init(12));
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
class SIRemoveShortExecBranches : public MachineFunctionPass {
|
||
|
private:
|
||
|
const SIInstrInfo *TII = nullptr;
|
||
|
bool getBlockDestinations(MachineBasicBlock &SrcMBB,
|
||
|
MachineBasicBlock *&TrueMBB,
|
||
|
MachineBasicBlock *&FalseMBB,
|
||
|
SmallVectorImpl<MachineOperand> &Cond);
|
||
|
bool mustRetainExeczBranch(const MachineBasicBlock &From,
|
||
|
const MachineBasicBlock &To) const;
|
||
|
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
|
||
|
|
||
|
public:
|
||
|
static char ID;
|
||
|
|
||
|
SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
|
||
|
initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
|
||
|
}
|
||
|
|
||
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||
|
};
|
||
|
|
||
|
} // End anonymous namespace.
|
||
|
|
||
|
INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
|
||
|
"SI remove short exec branches", false, false)
|
||
|
|
||
|
char SIRemoveShortExecBranches::ID = 0;
|
||
|
|
||
|
char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
|
||
|
|
||
|
bool SIRemoveShortExecBranches::getBlockDestinations(
|
||
|
MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
|
||
|
MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
|
||
|
if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
|
||
|
return false;
|
||
|
|
||
|
if (!FalseMBB)
|
||
|
FalseMBB = SrcMBB.getNextNode();
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool SIRemoveShortExecBranches::mustRetainExeczBranch(
|
||
|
const MachineBasicBlock &From, const MachineBasicBlock &To) const {
|
||
|
unsigned NumInstr = 0;
|
||
|
const MachineFunction *MF = From.getParent();
|
||
|
|
||
|
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
|
||
|
MBBI != End && MBBI != ToI; ++MBBI) {
|
||
|
const MachineBasicBlock &MBB = *MBBI;
|
||
|
|
||
|
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
|
||
|
I != E; ++I) {
|
||
|
// When a uniform loop is inside non-uniform control flow, the branch
|
||
|
// leaving the loop might never be taken when EXEC = 0.
|
||
|
// Hence we should retain cbranch out of the loop lest it become infinite.
|
||
|
if (I->isConditionalBranch())
|
||
|
return true;
|
||
|
|
||
|
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
|
||
|
return true;
|
||
|
|
||
|
if (TII->isKillTerminator(I->getOpcode()))
|
||
|
return true;
|
||
|
|
||
|
// These instructions are potentially expensive even if EXEC = 0.
|
||
|
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
|
||
|
I->getOpcode() == AMDGPU::S_WAITCNT)
|
||
|
return true;
|
||
|
|
||
|
++NumInstr;
|
||
|
if (NumInstr >= SkipThreshold)
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Returns true if the skip branch instruction is removed.
|
||
|
bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
|
||
|
MachineBasicBlock &SrcMBB) {
|
||
|
MachineBasicBlock *TrueMBB = nullptr;
|
||
|
MachineBasicBlock *FalseMBB = nullptr;
|
||
|
SmallVector<MachineOperand, 1> Cond;
|
||
|
|
||
|
if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
|
||
|
return false;
|
||
|
|
||
|
// Consider only the forward branches.
|
||
|
if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
|
||
|
mustRetainExeczBranch(*FalseMBB, *TrueMBB))
|
||
|
return false;
|
||
|
|
||
|
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
|
||
|
MI.eraseFromParent();
|
||
|
SrcMBB.removeSuccessor(TrueMBB);
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
|
||
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||
|
TII = ST.getInstrInfo();
|
||
|
MF.RenumberBlocks();
|
||
|
bool Changed = false;
|
||
|
|
||
|
for (MachineBasicBlock &MBB : MF) {
|
||
|
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||
|
if (MBBI == MBB.end())
|
||
|
continue;
|
||
|
|
||
|
MachineInstr &MI = *MBBI;
|
||
|
switch (MI.getOpcode()) {
|
||
|
case AMDGPU::S_CBRANCH_EXECZ:
|
||
|
Changed = removeExeczBranch(MI, MBB);
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return Changed;
|
||
|
}
|