//===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// The SI code emitter produces machine code that can be executed /// directly on the GPU device. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" using namespace llvm; namespace { class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { const MCRegisterInfo &MRI; /// Encode an fp or int literal uint32_t getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo, const MCSubtargetInfo &STI) const; public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, MCContext &ctx) : AMDGPUMCCodeEmitter(mcii), MRI(mri) {} SIMCCodeEmitter(const SIMCCodeEmitter &) = delete; SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete; /// Encode the instruction and write it to the OS. void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; /// \returns the encoding for an MCOperand. uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; /// Use a fixup to encode the simm16 field for SOPP branch /// instructions. unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx) { return new SIMCCodeEmitter(MCII, MRI, Ctx); } // Returns the encoding value to use if the given integer is an integer inline // immediate value, or 0 if it is not. template static uint32_t getIntInlineImmEncoding(IntTy Imm) { if (Imm >= 0 && Imm <= 64) return 128 + Imm; if (Imm >= -16 && Imm <= -1) return 192 + std::abs(Imm); return 0; } static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) { uint16_t IntImm = getIntInlineImmEncoding(static_cast(Val)); return IntImm == 0 ? 255 : IntImm; } static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) { uint16_t IntImm = getIntInlineImmEncoding(static_cast(Val)); if (IntImm != 0) return IntImm; if (Val == 0x3800) // 0.5 return 240; if (Val == 0xB800) // -0.5 return 241; if (Val == 0x3C00) // 1.0 return 242; if (Val == 0xBC00) // -1.0 return 243; if (Val == 0x4000) // 2.0 return 244; if (Val == 0xC000) // -2.0 return 245; if (Val == 0x4400) // 4.0 return 246; if (Val == 0xC400) // -4.0 return 247; if (Val == 0x3118 && // 1.0 / (2.0 * pi) STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) return 248; return 255; } static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) { uint32_t IntImm = getIntInlineImmEncoding(static_cast(Val)); if (IntImm != 0) return IntImm; if (Val == FloatToBits(0.5f)) return 240; if (Val == FloatToBits(-0.5f)) return 241; if (Val == FloatToBits(1.0f)) return 242; if (Val == FloatToBits(-1.0f)) return 243; if (Val == FloatToBits(2.0f)) return 244; if (Val == FloatToBits(-2.0f)) return 245; if (Val == FloatToBits(4.0f)) return 246; if (Val == FloatToBits(-4.0f)) return 247; if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi) STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) return 248; return 255; } static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) { uint32_t IntImm = getIntInlineImmEncoding(static_cast(Val)); if (IntImm != 0) return IntImm; if (Val == DoubleToBits(0.5)) return 240; if (Val == DoubleToBits(-0.5)) return 241; if (Val == DoubleToBits(1.0)) return 242; if (Val == DoubleToBits(-1.0)) return 243; if (Val == DoubleToBits(2.0)) return 244; if (Val == DoubleToBits(-2.0)) return 245; if (Val == DoubleToBits(4.0)) return 246; if (Val == DoubleToBits(-4.0)) return 247; if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi) STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) return 248; return 255; } uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo, const MCSubtargetInfo &STI) const { int64_t Imm; if (MO.isExpr()) { const auto *C = dyn_cast(MO.getExpr()); if (!C) return 255; Imm = C->getValue(); } else { assert(!MO.isFPImm()); if (!MO.isImm()) return ~0; Imm = MO.getImm(); } switch (OpInfo.OperandType) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_AC_INT32: case AMDGPU::OPERAND_REG_INLINE_AC_FP32: return getLit32Encoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: return getLit64Encoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: return getLit16IntEncoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: // FIXME Is this correct? What do inline immediates do on SI for f16 src // which does not have f16 support? return getLit16Encoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: { if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) return getLit32Encoding(static_cast(Imm), STI); if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) return getLit16Encoding(static_cast(Imm), STI); LLVM_FALLTHROUGH; } case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: return getLit16IntEncoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { uint16_t Lo16 = static_cast(Imm); uint32_t Encoding = getLit16Encoding(Lo16, STI); return Encoding; } default: llvm_unreachable("invalid operand size"); } } void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { verifyInstructionPredicates(MI, computeAvailableFeatures(STI.getFeatureBits())); uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI); const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); unsigned bytes = Desc.getSize(); for (unsigned i = 0; i < bytes; i++) { OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); } // NSA encoding. if (AMDGPU::isGFX10Plus(STI) && Desc.TSFlags & SIInstrFlags::MIMG) { int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); assert(vaddr0 >= 0 && srsrc > vaddr0); unsigned NumExtraAddrs = srsrc - vaddr0 - 1; unsigned NumPadding = (-NumExtraAddrs) & 3; for (unsigned i = 0; i < NumExtraAddrs; ++i) OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), Fixups, STI)); for (unsigned i = 0; i < NumPadding; ++i) OS.write(0); } if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) || (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])) return; // Check for additional literals in SRC0/1/2 (Op 1/2/3) for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) { // Check if this operand should be encoded as [SV]Src if (!AMDGPU::isSISrcOperand(Desc, i)) continue; // Is this operand a literal immediate? const MCOperand &Op = MI.getOperand(i); if (getLitEncoding(Op, Desc.OpInfo[i], STI) != 255) continue; // Yes! Encode it int64_t Imm = 0; if (Op.isImm()) Imm = Op.getImm(); else if (Op.isExpr()) { if (const auto *C = dyn_cast(Op.getExpr())) Imm = C->getValue(); } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value. llvm_unreachable("Must be immediate or expr"); for (unsigned j = 0; j < 4; j++) { OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff)); } // Only one literal value allowed break; } } unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br; Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); return 0; } return getMachineOpValue(MI, MO, Fixups, STI); } unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { auto Offset = MI.getOperand(OpNo).getImm(); // VI only supports 20-bit unsigned offsets. assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset)); return Offset; } unsigned SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; const MCOperand &MO = MI.getOperand(OpNo); if (MO.isReg()) { unsigned Reg = MO.getReg(); RegEnc |= MRI.getEncodingValue(Reg); RegEnc &= SDWA9EncValues::SRC_VGPR_MASK; if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; } return RegEnc; } else { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); if (Enc != ~0U && Enc != 255) { return Enc | SDWA9EncValues::SRC_SGPR_MASK; } } llvm_unreachable("Unsupported operand kind"); return 0; } unsigned SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { using namespace AMDGPU::SDWA; uint64_t RegEnc = 0; const MCOperand &MO = MI.getOperand(OpNo); unsigned Reg = MO.getReg(); if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) { RegEnc |= MRI.getEncodingValue(Reg); RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK; RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK; } return RegEnc; } unsigned SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { unsigned Reg = MI.getOperand(OpNo).getReg(); uint64_t Enc = MRI.getEncodingValue(Reg); // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma // instructions use acc[0:1] modifier bits to distinguish. These bits are // encoded as a virtual 9th bit of the register for these operands. if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) || MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg)) Enc |= 512; return Enc; } static bool needsPCRel(const MCExpr *Expr) { switch (Expr->getKind()) { case MCExpr::SymbolRef: { auto *SE = cast(Expr); MCSymbolRefExpr::VariantKind Kind = SE->getKind(); return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO && Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI; } case MCExpr::Binary: { auto *BE = cast(Expr); if (BE->getOpcode() == MCBinaryExpr::Sub) return false; return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS()); } case MCExpr::Unary: return needsPCRel(cast(Expr)->getSubExpr()); case MCExpr::Target: case MCExpr::Constant: return false; } llvm_unreachable("invalid kind"); } uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { if (MO.isReg()) return MRI.getEncodingValue(MO.getReg()); if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) { // FIXME: If this is expression is PCRel or not should not depend on what // the expression looks like. Given that this is just a general expression, // it should probably be FK_Data_4 and whatever is producing // // s_add_u32 s2, s2, (extern_const_addrspace+16 // // And expecting a PCRel should instead produce // // .Ltmp1: // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1 MCFixupKind Kind; if (needsPCRel(MO.getExpr())) Kind = FK_PCRel_4; else Kind = FK_Data_4; const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint32_t Offset = Desc.getSize(); assert(Offset == 4 || Offset == 8); Fixups.push_back( MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc())); } // Figure out the operand number, needed for isSrcOperand check unsigned OpNo = 0; for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) { if (&MO == &MI.getOperand(OpNo)) break; } const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); if (AMDGPU::isSISrcOperand(Desc, OpNo)) { uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8)) return Enc; } else if (MO.isImm()) return MO.getImm(); llvm_unreachable("Encoding of this operand type is not supported yet."); return 0; } #define ENABLE_INSTR_PREDICATE_VERIFIER #include "AMDGPUGenMCCodeEmitter.inc"