249 lines
8.4 KiB
C++
249 lines
8.4 KiB
C++
|
//===-- X86TileConfig.cpp - Tile Register Configure----------------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
/// \file Pass to config the shape of AMX physical registers
|
||
|
/// AMX register need to be configured before use. In X86PreTileConfig pass
|
||
|
/// the pldtilecfg instruction is inserted, however at that time we don't
|
||
|
/// know the shape of each physical tile registers, because the register
|
||
|
/// allocation is not done yet. This pass runs after egister allocation
|
||
|
/// pass. It collects the shape information of each physical tile register
|
||
|
/// and store the shape in the stack slot that is allocated for load config
|
||
|
/// to tile config register.
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "X86.h"
|
||
|
#include "X86InstrBuilder.h"
|
||
|
#include "X86MachineFunctionInfo.h"
|
||
|
#include "X86RegisterInfo.h"
|
||
|
#include "X86Subtarget.h"
|
||
|
#include "llvm/CodeGen/LiveIntervals.h"
|
||
|
#include "llvm/CodeGen/MachineDominators.h"
|
||
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||
|
#include "llvm/CodeGen/MachineInstr.h"
|
||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||
|
#include "llvm/CodeGen/Passes.h"
|
||
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||
|
#include "llvm/CodeGen/TileShapeInfo.h"
|
||
|
#include "llvm/CodeGen/VirtRegMap.h"
|
||
|
#include "llvm/InitializePasses.h"
|
||
|
|
||
|
using namespace llvm;
|
||
|
|
||
|
#define DEBUG_TYPE "tile-config"
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
class X86TileConfig : public MachineFunctionPass {
|
||
|
// context
|
||
|
MachineFunction *MF = nullptr;
|
||
|
const X86Subtarget *ST = nullptr;
|
||
|
const TargetRegisterInfo *TRI;
|
||
|
const TargetInstrInfo *TII;
|
||
|
MachineDominatorTree *DomTree = nullptr;
|
||
|
MachineRegisterInfo *MRI = nullptr;
|
||
|
VirtRegMap *VRM = nullptr;
|
||
|
LiveIntervals *LIS = nullptr;
|
||
|
|
||
|
MachineInstr *getTileConfigPoint();
|
||
|
void tileConfig();
|
||
|
|
||
|
public:
|
||
|
X86TileConfig() : MachineFunctionPass(ID) {}
|
||
|
|
||
|
/// Return the pass name.
|
||
|
StringRef getPassName() const override { return "Tile Register Configure"; }
|
||
|
|
||
|
/// X86TileConfig analysis usage.
|
||
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||
|
|
||
|
/// Perform register allocation.
|
||
|
bool runOnMachineFunction(MachineFunction &mf) override;
|
||
|
|
||
|
MachineFunctionProperties getRequiredProperties() const override {
|
||
|
return MachineFunctionProperties().set(
|
||
|
MachineFunctionProperties::Property::NoPHIs);
|
||
|
}
|
||
|
|
||
|
static char ID;
|
||
|
};
|
||
|
|
||
|
} // end anonymous namespace
|
||
|
|
||
|
char X86TileConfig::ID = 0;
|
||
|
|
||
|
INITIALIZE_PASS_BEGIN(X86TileConfig, "tileconfig", "Tile Register Configure",
|
||
|
false, false)
|
||
|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||
|
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
|
||
|
INITIALIZE_PASS_END(X86TileConfig, "tileconfig", "Tile Register Configure",
|
||
|
false, false)
|
||
|
|
||
|
void X86TileConfig::getAnalysisUsage(AnalysisUsage &AU) const {
|
||
|
AU.addRequired<MachineDominatorTree>();
|
||
|
AU.addRequired<LiveIntervals>();
|
||
|
AU.addPreserved<SlotIndexes>();
|
||
|
AU.addRequired<VirtRegMap>();
|
||
|
AU.setPreservesAll();
|
||
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||
|
}
|
||
|
|
||
|
static unsigned getTilePhysRegIndex(Register PhysReg) {
|
||
|
assert((PhysReg >= X86::TMM0 && X86::TMM0 <= X86::TMM7) &&
|
||
|
"Tile register number is invalid");
|
||
|
return (PhysReg - X86::TMM0);
|
||
|
}
|
||
|
|
||
|
static MachineInstr *
|
||
|
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||
|
Register SrcReg, unsigned BitSize, int FrameIdx, int Offset,
|
||
|
const TargetInstrInfo *TII, const TargetRegisterClass *RC,
|
||
|
const TargetRegisterInfo *TRI) {
|
||
|
|
||
|
unsigned SubIdx = (BitSize == 8) ? X86::sub_8bit : X86::sub_16bit;
|
||
|
unsigned Opc = (BitSize == 8) ? X86::MOV8mr : X86::MOV16mr;
|
||
|
if (BitSize == TRI->getRegSizeInBits(*RC))
|
||
|
SubIdx = 0;
|
||
|
MachineInstr *NewMI =
|
||
|
addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)), FrameIdx,
|
||
|
Offset)
|
||
|
.addReg(SrcReg, 0, SubIdx);
|
||
|
return NewMI;
|
||
|
}
|
||
|
|
||
|
static MachineInstr *storeImmToStackSlot(MachineBasicBlock &MBB,
|
||
|
MachineBasicBlock::iterator MI,
|
||
|
int64_t Imm, unsigned BitSize,
|
||
|
int FrameIdx, int Offset,
|
||
|
const TargetInstrInfo *TII) {
|
||
|
unsigned Opc = (BitSize == 8) ? X86::MOV8mi : X86::MOV16mi;
|
||
|
return addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)),
|
||
|
FrameIdx, Offset)
|
||
|
.addImm(Imm);
|
||
|
}
|
||
|
|
||
|
MachineInstr *X86TileConfig::getTileConfigPoint() {
|
||
|
for (MachineBasicBlock &MBB : *MF) {
|
||
|
|
||
|
// Traverse the basic block.
|
||
|
for (MachineInstr &MI : MBB)
|
||
|
// Refer X86PreTileConfig.cpp.
|
||
|
// We only support one tile config for now.
|
||
|
if (MI.getOpcode() == X86::PLDTILECFG)
|
||
|
return &MI;
|
||
|
}
|
||
|
|
||
|
return nullptr;
|
||
|
}
|
||
|
|
||
|
void X86TileConfig::tileConfig() {
|
||
|
MachineInstr *MI = getTileConfigPoint();
|
||
|
if (!MI)
|
||
|
return;
|
||
|
MachineBasicBlock *MBB = MI->getParent();
|
||
|
int SS = MI->getOperand(1).getIndex();
|
||
|
BitVector PhysRegs(TRI->getNumRegs());
|
||
|
|
||
|
// Fill in the palette first.
|
||
|
auto *NewMI = storeImmToStackSlot(*MBB, *MI, 1, 8, SS, 0, TII);
|
||
|
LIS->InsertMachineInstrInMaps(*NewMI);
|
||
|
// Fill in the shape of each tile physical register.
|
||
|
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
|
||
|
Register VirtReg = Register::index2VirtReg(i);
|
||
|
if (MRI->reg_nodbg_empty(VirtReg))
|
||
|
continue;
|
||
|
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
|
||
|
if (RC.getID() != X86::TILERegClassID)
|
||
|
continue;
|
||
|
Register PhysReg = VRM->getPhys(VirtReg);
|
||
|
if (PhysRegs.test(PhysReg))
|
||
|
continue;
|
||
|
PhysRegs.set(PhysReg);
|
||
|
ShapeT Shape = VRM->getShape(VirtReg);
|
||
|
Register RowReg = Shape.getRow()->getReg();
|
||
|
Register ColReg = Shape.getCol()->getReg();
|
||
|
|
||
|
// Here is the data format for the tile config.
|
||
|
// 0 palette
|
||
|
// 1 start_row
|
||
|
// 2-15 reserved, must be zero
|
||
|
// 16-17 tile0.colsb Tile 0 bytes per row.
|
||
|
// 18-19 tile1.colsb Tile 1 bytes per row.
|
||
|
// 20-21 tile2.colsb Tile 2 bytes per row.
|
||
|
// ... (sequence continues)
|
||
|
// 30-31 tile7.colsb Tile 7 bytes per row.
|
||
|
// 32-47 reserved, must be zero
|
||
|
// 48 tile0.rows Tile 0 rows.
|
||
|
// 49 tile1.rows Tile 1 rows.
|
||
|
// 50 tile2.rows Tile 2 rows.
|
||
|
// ... (sequence continues)
|
||
|
// 55 tile7.rows Tile 7 rows.
|
||
|
// 56-63 reserved, must be zero
|
||
|
unsigned Index = getTilePhysRegIndex(PhysReg);
|
||
|
int RowOffset = 48 + Index;
|
||
|
int ColOffset = 16 + Index * 2;
|
||
|
|
||
|
unsigned BitSize = 8;
|
||
|
for (const auto &Pair : {std::make_pair(RowReg, RowOffset),
|
||
|
std::make_pair(ColReg, ColOffset)}) {
|
||
|
int64_t Imm;
|
||
|
int ImmCount = 0;
|
||
|
// All def must be the same value, otherwise it is invalid MIs.
|
||
|
// Immediate is prefered.
|
||
|
for (const MachineOperand &MO : MRI->def_operands(Pair.first)) {
|
||
|
const auto *Inst = MO.getParent();
|
||
|
if (Inst->isMoveImmediate()) {
|
||
|
ImmCount++;
|
||
|
Imm = Inst->getOperand(1).getImm();
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
auto StoreConfig = [&](int Offset) {
|
||
|
MachineInstr *NewMI = nullptr;
|
||
|
if (ImmCount)
|
||
|
NewMI = storeImmToStackSlot(*MBB, *MI, Imm, BitSize, SS, Offset, TII);
|
||
|
else {
|
||
|
const TargetRegisterClass *RC = MRI->getRegClass(Pair.first);
|
||
|
NewMI = storeRegToStackSlot(*MBB, *MI, Pair.first, BitSize, SS,
|
||
|
Offset, TII, RC, TRI);
|
||
|
}
|
||
|
SlotIndex SIdx = LIS->InsertMachineInstrInMaps(*NewMI);
|
||
|
if (!ImmCount) {
|
||
|
// Extend the live interval.
|
||
|
SmallVector<SlotIndex, 8> EndPoints = {SIdx.getRegSlot()};
|
||
|
LiveInterval &Int = LIS->getInterval(Pair.first);
|
||
|
LIS->extendToIndices(Int, EndPoints);
|
||
|
}
|
||
|
};
|
||
|
StoreConfig(Pair.second);
|
||
|
BitSize += 8;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool X86TileConfig::runOnMachineFunction(MachineFunction &mf) {
|
||
|
MF = &mf;
|
||
|
MRI = &mf.getRegInfo();
|
||
|
ST = &mf.getSubtarget<X86Subtarget>();
|
||
|
TRI = ST->getRegisterInfo();
|
||
|
TII = mf.getSubtarget().getInstrInfo();
|
||
|
DomTree = &getAnalysis<MachineDominatorTree>();
|
||
|
VRM = &getAnalysis<VirtRegMap>();
|
||
|
LIS = &getAnalysis<LiveIntervals>();
|
||
|
|
||
|
if (VRM->isShapeMapEmpty())
|
||
|
return false;
|
||
|
|
||
|
tileConfig();
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
FunctionPass *llvm::createX86TileConfigPass() { return new X86TileConfig(); }
|