//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the PowerPC-specific support for the FastISel class. Some // of the target-specific code is generated by tablegen in the file // PPCGenFastISel.inc, which is #included here. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCCCState.h" #include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" //===----------------------------------------------------------------------===// // // TBD: // fastLowerArguments: Handle simple cases. // PPCMaterializeGV: Handle TLS. // SelectCall: Handle function pointers. // SelectCall: Handle multi-register return values. // SelectCall: Optimize away nops for local calls. // processCallArgs: Handle bit-converted arguments. // finishCall: Handle multi-register return values. // PPCComputeAddress: Handle parameter references as FrameIndex's. // PPCEmitCmp: Handle immediate as operand 1. // SelectCall: Handle small byval arguments. // SelectIntrinsicCall: Implement. // SelectSelect: Implement. // Consider factoring isTypeLegal into the base class. // Implement switches and jump tables. // //===----------------------------------------------------------------------===// using namespace llvm; #define DEBUG_TYPE "ppcfastisel" namespace { typedef struct Address { enum { RegBase, FrameIndexBase } BaseType; union { unsigned Reg; int FI; } Base; long Offset; // Innocuous defaults for our address. Address() : BaseType(RegBase), Offset(0) { Base.Reg = 0; } } Address; class PPCFastISel final : public FastISel { const TargetMachine &TM; const PPCSubtarget *Subtarget; PPCFunctionInfo *PPCFuncInfo; const TargetInstrInfo &TII; const TargetLowering &TLI; LLVMContext *Context; public: explicit PPCFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()), Subtarget(&FuncInfo.MF->getSubtarget()), PPCFuncInfo(FuncInfo.MF->getInfo()), TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()), Context(&FuncInfo.Fn->getContext()) {} // Backend specific FastISel code. private: bool fastSelectInstruction(const Instruction *I) override; unsigned fastMaterializeConstant(const Constant *C) override; unsigned fastMaterializeAlloca(const AllocaInst *AI) override; bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) override; bool fastLowerArguments() override; unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); bool fastLowerCall(CallLoweringInfo &CLI) override; // Instruction selection routines. private: bool SelectLoad(const Instruction *I); bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); bool SelectFPExt(const Instruction *I); bool SelectFPTrunc(const Instruction *I); bool SelectIToFP(const Instruction *I, bool IsSigned); bool SelectFPToI(const Instruction *I, bool IsSigned); bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); // Utility routines. private: bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool isValueAvailable(const Value *V) const; bool isVSFRCRegClass(const TargetRegisterClass *RC) const { return RC->getID() == PPC::VSFRCRegClassID; } bool isVSSRCRegClass(const TargetRegisterClass *RC) const { return RC->getID() == PPC::VSSRCRegClassID; } unsigned copyRegToRegClass(const TargetRegisterClass *ToRC, unsigned SrcReg, unsigned Flag = 0, unsigned SubReg = 0) { unsigned TmpReg = createResultReg(ToRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg); return TmpReg; } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg, const PPC::Predicate Pred); bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt = true, unsigned FP64LoadOpc = PPC::LFD); bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); bool PPCComputeAddress(const Value *Obj, Address &Addr); void PPCSimplifyAddress(Address &Addr, bool &UseOffset, unsigned &IndexReg); bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT, bool UseSExt = true); unsigned PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned PPCMoveToIntReg(const Instruction *I, MVT VT, unsigned SrcReg, bool IsSigned); unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned); // Call handling routines. private: bool processCallArgs(SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, SmallVectorImpl &RegArgs, CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg); bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); private: #include "PPCGenFastISel.inc" }; } // end anonymous namespace static Optional getComparePred(CmpInst::Predicate Pred) { switch (Pred) { // These are not representable with any single compare. case CmpInst::FCMP_FALSE: case CmpInst::FCMP_TRUE: // Major concern about the following 6 cases is NaN result. The comparison // result consists of 4 bits, indicating lt, eq, gt and un (unordered), // only one of which will be set. The result is generated by fcmpu // instruction. However, bc instruction only inspects one of the first 3 // bits, so when un is set, bc instruction may jump to an undesired // place. // // More specifically, if we expect an unordered comparison and un is set, we // expect to always go to true branch; in such case UEQ, UGT and ULT still // give false, which are undesired; but UNE, UGE, ULE happen to give true, // since they are tested by inspecting !eq, !lt, !gt, respectively. // // Similarly, for ordered comparison, when un is set, we always expect the // result to be false. In such case OGT, OLT and OEQ is good, since they are // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE // and ONE are tested through !lt, !gt and !eq, and these are true. case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT: case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: case CmpInst::FCMP_ONE: default: return Optional(); case CmpInst::FCMP_OEQ: case CmpInst::ICMP_EQ: return PPC::PRED_EQ; case CmpInst::FCMP_OGT: case CmpInst::ICMP_UGT: case CmpInst::ICMP_SGT: return PPC::PRED_GT; case CmpInst::FCMP_UGE: case CmpInst::ICMP_UGE: case CmpInst::ICMP_SGE: return PPC::PRED_GE; case CmpInst::FCMP_OLT: case CmpInst::ICMP_ULT: case CmpInst::ICMP_SLT: return PPC::PRED_LT; case CmpInst::FCMP_ULE: case CmpInst::ICMP_ULE: case CmpInst::ICMP_SLE: return PPC::PRED_LE; case CmpInst::FCMP_UNE: case CmpInst::ICMP_NE: return PPC::PRED_NE; case CmpInst::FCMP_ORD: return PPC::PRED_NU; case CmpInst::FCMP_UNO: return PPC::PRED_UN; } } // Determine whether the type Ty is simple enough to be handled by // fast-isel, and return its equivalent machine type in VT. // FIXME: Copied directly from ARM -- factor into base class? bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT Evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (Evt == MVT::Other || !Evt.isSimple()) return false; VT = Evt.getSimpleVT(); // Handle all legal types, i.e. a register that will directly hold this // value. return TLI.isTypeLegal(VT); } // Determine whether the type Ty is simple enough to be handled by // fast-isel as a load target, and return its equivalent machine type in VT. bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) { return true; } return false; } bool PPCFastISel::isValueAvailable(const Value *V) const { if (!isa(V)) return true; const auto *I = cast(V); return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; } // Given a value Obj, create an Address object Addr that represents its // address. Return false if we can't handle it. bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from // another block, otherwise it may not have a virtual register assigned. if (FuncInfo.StaticAllocaMap.count(static_cast(Obj)) || FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { Opcode = I->getOpcode(); U = I; } } else if (const ConstantExpr *C = dyn_cast(Obj)) { Opcode = C->getOpcode(); U = C; } switch (Opcode) { default: break; case Instruction::BitCast: // Look through bitcasts. return PPCComputeAddress(U->getOperand(0), Addr); case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(DL, U->getOperand(0)->getType()) == TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::GetElementPtr: { Address SavedAddr = Addr; long TmpOffset = Addr.Offset; // Iterate through the GEP folding the constants into offsets where // we can. gep_type_iterator GTI = gep_type_begin(U); for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end(); II != IE; ++II, ++GTI) { const Value *Op = *II; if (StructType *STy = GTI.getStructTypeOrNull()) { const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); for (;;) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; break; } if (canFoldAddIntoGEP(U, Op)) { // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast(cast(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; // Iterate on the other operand. Op = cast(Op)->getOperand(0); continue; } // Unsupported goto unsupported_gep; } } } // Try to grab the base operand now. Addr.Offset = TmpOffset; if (PPCComputeAddress(U->getOperand(0), Addr)) return true; // We failed, restore everything and try the other options. Addr = SavedAddr; unsupported_gep: break; } case Instruction::Alloca: { const AllocaInst *AI = cast(Obj); DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { Addr.BaseType = Address::FrameIndexBase; Addr.Base.FI = SI->second; return true; } break; } } // FIXME: References to parameters fall through to the behavior // below. They should be able to reference a frame index since // they are stored to the stack, so we can get "ld rx, offset(r1)" // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will // just contain the parameter. Try to handle this with a FI. // Try to get this in a register if nothing else has worked. if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); // Prevent assignment of base register to X0, which is inappropriate // for loads and stores alike. if (Addr.Base.Reg != 0) MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass); return Addr.Base.Reg != 0; } // Fix up some addresses that can't be used directly. For example, if // an offset won't fit in an instruction field, we may need to move it // into an index register. void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset, unsigned &IndexReg) { // Check whether the offset fits in the instruction field. if (!isInt<16>(Addr.Offset)) UseOffset = false; // If this is a stack pointer and the offset needs to be simplified then // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); Addr.Base.Reg = ResultReg; Addr.BaseType = Address::RegBase; } if (!UseOffset) { IntegerType *OffsetTy = Type::getInt64Ty(*Context); const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset)); IndexReg = PPCMaterializeInt(Offset, MVT::i64); assert(IndexReg && "Unexpected error in PPCMaterializeInt!"); } } // Emit a load instruction if possible, returning true if we succeeded, // otherwise false. See commentary below for how the register class of // the load is determined. bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt, unsigned FP64LoadOpc) { unsigned Opc; bool UseOffset = true; bool HasSPE = Subtarget->hasSPE(); // If ResultReg is given, it determines the register class of the load. // Otherwise, RC is the register class to use. If the result of the // load isn't anticipated in this block, both may be zero, in which // case we must make a conservative guess. In particular, don't assign // R0 or X0 to the result register, as the result may be used in a load, // store, add-immediate, or isel that won't permit this. (Though // perhaps the spill and reload of live-exit values would handle this?) const TargetRegisterClass *UseRC = (ResultReg ? MRI.getRegClass(ResultReg) : (RC ? RC : (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) : (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) : (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : &PPC::GPRC_and_GPRC_NOR0RegClass))))); bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass); switch (VT.SimpleTy) { default: // e.g., vector types not handled return false; case MVT::i8: Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8; break; case MVT::i16: Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8) : (Is32BitInt ? PPC::LHA : PPC::LHA8)); break; case MVT::i32: Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8) : (Is32BitInt ? PPC::LWA_32 : PPC::LWA)); if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0)) UseOffset = false; break; case MVT::i64: Opc = PPC::LD; assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) && "64-bit load with 32-bit target??"); UseOffset = ((Addr.Offset & 3) == 0); break; case MVT::f32: Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS; break; case MVT::f64: Opc = FP64LoadOpc; break; } // If necessary, materialize the offset into a register and use // the indexed form. Also handle stack pointers with special needs. unsigned IndexReg = 0; PPCSimplifyAddress(Addr, UseOffset, IndexReg); // If this is a potential VSX load with an offset of 0, a VSX indexed load can // be used. bool IsVSSRC = isVSSRCRegClass(UseRC); bool IsVSFRC = isVSFRCRegClass(UseRC); bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS; bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD; if ((Is32VSXLoad || Is64VSXLoad) && (Addr.BaseType != Address::FrameIndexBase) && UseOffset && (Addr.Offset == 0)) { UseOffset = false; } if (ResultReg == 0) ResultReg = createResultReg(UseRC); // Note: If we still have a frame index here, we know the offset is // in range, as otherwise PPCSimplifyAddress would have converted it // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { // VSX only provides an indexed load. if (Is32VSXLoad || Is64VSXLoad) return false; MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, Addr.Offset), MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlign(Addr.Base.FI)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); // Base reg with offset in range. } else if (UseOffset) { // VSX only provides an indexed load. if (Is32VSXLoad || Is64VSXLoad) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. } else { // Get the RR opcode corresponding to the RI one. FIXME: It would be // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it // is hard to get at. switch (Opc) { default: llvm_unreachable("Unexpected opcode!"); case PPC::LBZ: Opc = PPC::LBZX; break; case PPC::LBZ8: Opc = PPC::LBZX8; break; case PPC::LHZ: Opc = PPC::LHZX; break; case PPC::LHZ8: Opc = PPC::LHZX8; break; case PPC::LHA: Opc = PPC::LHAX; break; case PPC::LHA8: Opc = PPC::LHAX8; break; case PPC::LWZ: Opc = PPC::LWZX; break; case PPC::LWZ8: Opc = PPC::LWZX8; break; case PPC::LWA: Opc = PPC::LWAX; break; case PPC::LWA_32: Opc = PPC::LWAX_32; break; case PPC::LD: Opc = PPC::LDX; break; case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break; case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; case PPC::EVLDD: Opc = PPC::EVLDDX; break; case PPC::SPELWZ: Opc = PPC::SPELWZX; break; } auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); // If we have an index register defined we use it in the store inst, // otherwise we use X0 as base as it makes the vector instructions to // use zero in the computation of the effective address regardless the // content of the register. if (IndexReg) MIB.addReg(Addr.Base.Reg).addReg(IndexReg); else MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); } return true; } // Attempt to fast-select a load instruction. bool PPCFastISel::SelectLoad(const Instruction *I) { // FIXME: No atomic loads are supported. if (cast(I)->isAtomic()) return false; // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getType(), VT)) return false; // See if we can handle this address. Address Addr; if (!PPCComputeAddress(I->getOperand(0), Addr)) return false; // Look at the currently assigned register for this instruction // to determine the required register class. This is necessary // to constrain RA from using R0/X0 when this is not legal. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true, Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; updateValueMap(I, ResultReg); return true; } // Emit a store instruction to store SrcReg at Addr. bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { assert(SrcReg && "Nothing to store!"); unsigned Opc; bool UseOffset = true; const TargetRegisterClass *RC = MRI.getRegClass(SrcReg); bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass); switch (VT.SimpleTy) { default: // e.g., vector types not handled return false; case MVT::i8: Opc = Is32BitInt ? PPC::STB : PPC::STB8; break; case MVT::i16: Opc = Is32BitInt ? PPC::STH : PPC::STH8; break; case MVT::i32: assert(Is32BitInt && "Not GPRC for i32??"); Opc = PPC::STW; break; case MVT::i64: Opc = PPC::STD; UseOffset = ((Addr.Offset & 3) == 0); break; case MVT::f32: Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS; break; case MVT::f64: Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD; break; } // If necessary, materialize the offset into a register and use // the indexed form. Also handle stack pointers with special needs. unsigned IndexReg = 0; PPCSimplifyAddress(Addr, UseOffset, IndexReg); // If this is a potential VSX store with an offset of 0, a VSX indexed store // can be used. bool IsVSSRC = isVSSRCRegClass(RC); bool IsVSFRC = isVSFRCRegClass(RC); bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS; bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD; if ((Is32VSXStore || Is64VSXStore) && (Addr.BaseType != Address::FrameIndexBase) && UseOffset && (Addr.Offset == 0)) { UseOffset = false; } // Note: If we still have a frame index here, we know the offset is // in range, as otherwise PPCSimplifyAddress would have converted it // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { // VSX only provides an indexed store. if (Is32VSXStore || Is64VSXStore) return false; MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, Addr.Offset), MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlign(Addr.Base.FI)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg) .addImm(Addr.Offset) .addFrameIndex(Addr.Base.FI) .addMemOperand(MMO); // Base reg with offset in range. } else if (UseOffset) { // VSX only provides an indexed store. if (Is32VSXStore || Is64VSXStore) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. } else { // Get the RR opcode corresponding to the RI one. FIXME: It would be // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it // is hard to get at. switch (Opc) { default: llvm_unreachable("Unexpected opcode!"); case PPC::STB: Opc = PPC::STBX; break; case PPC::STH : Opc = PPC::STHX; break; case PPC::STW : Opc = PPC::STWX; break; case PPC::STB8: Opc = PPC::STBX8; break; case PPC::STH8: Opc = PPC::STHX8; break; case PPC::STW8: Opc = PPC::STWX8; break; case PPC::STD: Opc = PPC::STDX; break; case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break; case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; case PPC::EVSTDD: Opc = PPC::EVSTDDX; break; case PPC::SPESTW: Opc = PPC::SPESTWX; break; } auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg); // If we have an index register defined we use it in the store inst, // otherwise we use X0 as base as it makes the vector instructions to // use zero in the computation of the effective address regardless the // content of the register. if (IndexReg) MIB.addReg(Addr.Base.Reg).addReg(IndexReg); else MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); } return true; } // Attempt to fast-select a store instruction. bool PPCFastISel::SelectStore(const Instruction *I) { Value *Op0 = I->getOperand(0); unsigned SrcReg = 0; // FIXME: No atomics loads are supported. if (cast(I)->isAtomic()) return false; // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(Op0->getType(), VT)) return false; // Get the value to be stored into a register. SrcReg = getRegForValue(Op0); if (SrcReg == 0) return false; // See if we can handle this address. Address Addr; if (!PPCComputeAddress(I->getOperand(1), Addr)) return false; if (!PPCEmitStore(VT, SrcReg, Addr)) return false; return true; } // Attempt to fast-select a branch instruction. bool PPCFastISel::SelectBranch(const Instruction *I) { const BranchInst *BI = cast(I); MachineBasicBlock *BrBB = FuncInfo.MBB; MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; // For now, just try the simplest case where it's fed by a compare. if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (isValueAvailable(CI)) { Optional OptPPCPred = getComparePred(CI->getPredicate()); if (!OptPPCPred) return false; PPC::Predicate PPCPred = OptPPCPred.getValue(); // Take advantage of fall-through opportunities. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); PPCPred = PPC::InvertPredicate(PPCPred); } unsigned CondReg = createResultReg(&PPC::CRRCRegClass); if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), CondReg, PPCPred)) return false; BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred) .addReg(CondReg) .addMBB(TBB); finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (const ConstantInt *CI = dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; fastEmitBranch(Target, DbgLoc); return true; } // FIXME: ARM looks for a case where the block containing the compare // has been split from the block containing the branch. If this happens, // there is a vreg available containing the result of the compare. I'm // not sure we can do much, as we've lost the predicate information with // the compare instruction -- we have a 4-bit CR but don't know which bit // to test here. return false; } // Attempt to emit a compare of the two source values. Signed and unsigned // comparisons are supported. Return false if we can't handle it. bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, bool IsZExt, unsigned DestReg, const PPC::Predicate Pred) { Type *Ty = SrcValue1->getType(); EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); if (SrcVT == MVT::i1 && Subtarget->useCRBits()) return false; // See if operand 2 is an immediate encodeable in the compare. // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are // similar to ARM in this regard. long Imm = 0; bool UseImm = false; const bool HasSPE = Subtarget->hasSPE(); // Only 16-bit integer constants can be represented in compares for // PowerPC. Others will be materialized into a register. if (const ConstantInt *ConstInt = dyn_cast(SrcValue2)) { if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) UseImm = true; } } unsigned SrcReg1 = getRegForValue(SrcValue1); if (SrcReg1 == 0) return false; unsigned SrcReg2 = 0; if (!UseImm) { SrcReg2 = getRegForValue(SrcValue2); if (SrcReg2 == 0) return false; } unsigned CmpOpc; bool NeedsExt = false; auto RC1 = MRI.getRegClass(SrcReg1); auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr; switch (SrcVT.SimpleTy) { default: return false; case MVT::f32: if (HasSPE) { switch (Pred) { default: return false; case PPC::PRED_EQ: CmpOpc = PPC::EFSCMPEQ; break; case PPC::PRED_LT: CmpOpc = PPC::EFSCMPLT; break; case PPC::PRED_GT: CmpOpc = PPC::EFSCMPGT; break; } } else { CmpOpc = PPC::FCMPUS; if (isVSSRCRegClass(RC1)) SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1); if (RC2 && isVSSRCRegClass(RC2)) SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2); } break; case MVT::f64: if (HasSPE) { switch (Pred) { default: return false; case PPC::PRED_EQ: CmpOpc = PPC::EFDCMPEQ; break; case PPC::PRED_LT: CmpOpc = PPC::EFDCMPLT; break; case PPC::PRED_GT: CmpOpc = PPC::EFDCMPGT; break; } } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) { CmpOpc = PPC::XSCMPUDP; } else { CmpOpc = PPC::FCMPUD; } break; case MVT::i1: case MVT::i8: case MVT::i16: NeedsExt = true; LLVM_FALLTHROUGH; case MVT::i32: if (!UseImm) CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW; else CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI; break; case MVT::i64: if (!UseImm) CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD; else CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI; break; } if (NeedsExt) { unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt)) return false; SrcReg1 = ExtReg; if (!UseImm) { unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt)) return false; SrcReg2 = ExtReg; } } if (!UseImm) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addReg(SrcReg2); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addImm(Imm); return true; } // Attempt to fast-select a floating-point extend instruction. bool PPCFastISel::SelectFPExt(const Instruction *I) { Value *Src = I->getOperand(0); EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f32 || DestVT != MVT::f64) return false; unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; // No code is generated for a FP extend. updateValueMap(I, SrcReg); return true; } // Attempt to fast-select a floating-point truncate instruction. bool PPCFastISel::SelectFPTrunc(const Instruction *I) { Value *Src = I->getOperand(0); EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f64 || DestVT != MVT::f32) return false; unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; // Round the result to single precision. unsigned DestReg; auto RC = MRI.getRegClass(SrcReg); if (Subtarget->hasSPE()) { DestReg = createResultReg(&PPC::GPRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD), DestReg) .addReg(SrcReg); } else if (isVSFRCRegClass(RC)) { DestReg = createResultReg(&PPC::VSSRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::XSRSP), DestReg) .addReg(SrcReg); } else { DestReg = createResultReg(&PPC::F4RCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) .addReg(SrcReg); } updateValueMap(I, DestReg); return true; } // Move an i32 or i64 value in a GPR to an f64 value in an FPR. // FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte // stack slot and 4-byte store/load sequence. Or just sext the 4-byte // case to 8 bytes which produces tighter code but wastes stack space. unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, bool IsSigned) { // If necessary, extend 32-bit int to 64-bit. if (SrcVT == MVT::i32) { unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned)) return 0; SrcReg = TmpReg; } // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. Address Addr; Addr.BaseType = Address::FrameIndexBase; Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false); // Store the value from the GPR. if (!PPCEmitStore(MVT::i64, SrcReg, Addr)) return 0; // Load the integer value into an FPR. The kind of load used depends // on a number of conditions. unsigned LoadOpc = PPC::LFD; if (SrcVT == MVT::i32) { if (!IsSigned) { LoadOpc = PPC::LFIWZX; Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4; } else if (Subtarget->hasLFIWAX()) { LoadOpc = PPC::LFIWAX; Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4; } } const TargetRegisterClass *RC = &PPC::F8RCRegClass; Register ResultReg = 0; if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) return 0; return ResultReg; } // Attempt to fast-select an integer-to-floating-point conversion. // FIXME: Once fast-isel has better support for VSX, conversions using // direct moves should be implemented. bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { MVT DstVT; Type *DstTy = I->getType(); if (!isTypeLegal(DstTy, DstVT)) return false; if (DstVT != MVT::f32 && DstVT != MVT::f64) return false; Value *Src = I->getOperand(0); EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32 && SrcVT != MVT::i64) return false; unsigned SrcReg = getRegForValue(Src); if (SrcReg == 0) return false; // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs if (Subtarget->hasSPE()) { unsigned Opc; if (DstVT == MVT::f32) Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI; else Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI; unsigned DestReg = createResultReg(&PPC::SPERCRegClass); // Generate the convert. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); updateValueMap(I, DestReg); return true; } // We can only lower an unsigned convert if we have the newer // floating-point conversion operations. if (!IsSigned && !Subtarget->hasFPCVT()) return false; // FIXME: For now we require the newer floating-point conversion operations // (which are present only on P7 and A2 server models) when converting // to single-precision float. Otherwise we have to generate a lot of // fiddly code to avoid double rounding. If necessary, the fiddly code // can be found in PPCTargetLowering::LowerINT_TO_FP(). if (DstVT == MVT::f32 && !Subtarget->hasFPCVT()) return false; // Extend the input if necessary. if (SrcVT == MVT::i8 || SrcVT == MVT::i16) { unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned)) return false; SrcVT = MVT::i64; SrcReg = TmpReg; } // Move the integer value to an FPR. unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned); if (FPReg == 0) return false; // Determine the opcode for the conversion. const TargetRegisterClass *RC = &PPC::F8RCRegClass; unsigned DestReg = createResultReg(RC); unsigned Opc; if (DstVT == MVT::f32) Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS; else Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; // Generate the convert. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(FPReg); updateValueMap(I, DestReg); return true; } // Move the floating-point value in SrcReg into an integer destination // register, and return the register (or zero if we can't handle it). // FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, unsigned SrcReg, bool IsSigned) { // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. // Note that if have STFIWX available, we could use a 4-byte stack // slot for i32, but this being fast-isel we'll just go with the // easiest code gen possible. Address Addr; Addr.BaseType = Address::FrameIndexBase; Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false); // Store the value from the FPR. if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) return 0; // Reload it into a GPR. If we want an i32 on big endian, modify the // address to have a 4-byte offset so we load from the right place. if (VT == MVT::i32) Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4; // Look at the currently assigned register for this instruction // to determine the required register class. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) return 0; return ResultReg; } // Attempt to fast-select a floating-point-to-integer conversion. // FIXME: Once fast-isel has better support for VSX, conversions using // direct moves should be implemented. bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { MVT DstVT, SrcVT; Type *DstTy = I->getType(); if (!isTypeLegal(DstTy, DstVT)) return false; if (DstVT != MVT::i32 && DstVT != MVT::i64) return false; // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG. if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() && !Subtarget->hasSPE()) return false; Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); if (!isTypeLegal(SrcTy, SrcVT)) return false; if (SrcVT != MVT::f32 && SrcVT != MVT::f64) return false; unsigned SrcReg = getRegForValue(Src); if (SrcReg == 0) return false; // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a // meaningless copy to get the register class right. const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); if (InRC == &PPC::F4RCRegClass) SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg); else if (InRC == &PPC::VSSRCRegClass) SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg); // Determine the opcode for the conversion, which takes place // entirely within FPRs or VSRs. unsigned DestReg; unsigned Opc; auto RC = MRI.getRegClass(SrcReg); if (Subtarget->hasSPE()) { DestReg = createResultReg(&PPC::GPRCRegClass); if (IsSigned) Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; else Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; } else if (isVSFRCRegClass(RC)) { DestReg = createResultReg(&PPC::VSFRCRegClass); if (DstVT == MVT::i32) Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS; else Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS; } else { DestReg = createResultReg(&PPC::F8RCRegClass); if (DstVT == MVT::i32) if (IsSigned) Opc = PPC::FCTIWZ; else Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; else Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; } // Generate the convert. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Now move the integer value from a float register to an integer register. unsigned IntReg = Subtarget->hasSPE() ? DestReg : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); if (IntReg == 0) return false; updateValueMap(I, IntReg); return true; } // Attempt to fast-select a binary integer operation that isn't already // handled automatically. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { EVT DestVT = TLI.getValueType(DL, I->getType(), true); // We can get here in the case when we have a binary operation on a non-legal // type and the target independent selector doesn't know how to handle it. if (DestVT != MVT::i16 && DestVT != MVT::i8) return false; // Look at the currently assigned register for this instruction // to determine the required register class. If there is no register, // make a conservative choice (don't assign R0). unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = (AssignedReg ? MRI.getRegClass(AssignedReg) : &PPC::GPRC_and_GPRC_NOR0RegClass); bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); unsigned Opc; switch (ISDOpcode) { default: return false; case ISD::ADD: Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8; break; case ISD::OR: Opc = IsGPRC ? PPC::OR : PPC::OR8; break; case ISD::SUB: Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8; break; } unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass); unsigned SrcReg1 = getRegForValue(I->getOperand(0)); if (SrcReg1 == 0) return false; // Handle case of small immediate operand. if (const ConstantInt *ConstInt = dyn_cast(I->getOperand(1))) { const APInt &CIVal = ConstInt->getValue(); int Imm = (int)CIVal.getSExtValue(); bool UseImm = true; if (isInt<16>(Imm)) { switch (Opc) { default: llvm_unreachable("Missing case!"); case PPC::ADD4: Opc = PPC::ADDI; MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); break; case PPC::ADD8: Opc = PPC::ADDI8; MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); break; case PPC::OR: Opc = PPC::ORI; break; case PPC::OR8: Opc = PPC::ORI8; break; case PPC::SUBF: if (Imm == -32768) UseImm = false; else { Opc = PPC::ADDI; MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); Imm = -Imm; } break; case PPC::SUBF8: if (Imm == -32768) UseImm = false; else { Opc = PPC::ADDI8; MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); Imm = -Imm; } break; } if (UseImm) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1) .addImm(Imm); updateValueMap(I, ResultReg); return true; } } } // Reg-reg case. unsigned SrcReg2 = getRegForValue(I->getOperand(1)); if (SrcReg2 == 0) return false; // Reverse operands for subtract-from. if (ISDOpcode == ISD::SUB) std::swap(SrcReg1, SrcReg2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2); updateValueMap(I, ResultReg); return true; } // Handle arguments to a call that we're attempting to fast-select. // Return false if the arguments are too complex for us at the moment. bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, SmallVectorImpl &RegArgs, CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg) { SmallVector ArgLocs; CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context); // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, Align(8)); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); // Bail out if we can't handle any of the arguments. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; MVT ArgVT = ArgVTs[VA.getValNo()]; // Skip vector arguments for now, as well as long double and // uint128_t, and anything that isn't passed in a register. if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || !VA.isRegLoc() || VA.needsCustom()) return false; // Skip bit-converted arguments for now. if (VA.getLocInfo() == CCValAssign::BCvt) return false; } // Get a count of how many bytes are to be pushed onto the stack. NumBytes = CCInfo.getNextStackOffset(); // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. NumBytes = std::max(NumBytes, LinkageSize + 64); // Issue CALLSEQ_START. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameSetupOpcode())) .addImm(NumBytes).addImm(0); // Prepare to assign register arguments. Every argument uses up a // GPR protocol register even if it's passed in a floating-point // register (unless we're using the fast calling convention). unsigned NextGPR = PPC::X3; unsigned NextFPR = PPC::F1; // Process arguments. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; unsigned Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; // Handle argument promotion and bitcasts. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); const TargetRegisterClass *RC = (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned TmpReg = createResultReg(RC); if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false)) llvm_unreachable("Failed to emit a sext!"); ArgVT = DestVT; Arg = TmpReg; break; } case CCValAssign::AExt: case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); const TargetRegisterClass *RC = (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned TmpReg = createResultReg(RC); if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true)) llvm_unreachable("Failed to emit a zext!"); ArgVT = DestVT; Arg = TmpReg; break; } case CCValAssign::BCvt: { // FIXME: Not yet handled. llvm_unreachable("Should have bailed before getting here!"); break; } } // Copy this argument to the appropriate register. unsigned ArgReg; if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { ArgReg = NextFPR++; if (CC != CallingConv::Fast) ++NextGPR; } else ArgReg = NextGPR++; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg); RegArgs.push_back(ArgReg); } return true; } // For a call that we've determined we can fast-select, finish the // call sequence and generate a copy to obtain the return value (if any). bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) { CallingConv::ID CC = CLI.CallConv; // Issue CallSEQ_END. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameDestroyOpcode())) .addImm(NumBytes).addImm(0); // Next, generate a copy to obtain the return value. // FIXME: No multi-register return values yet, though I don't foresee // any real difficulties there. if (RetVT != MVT::isVoid) { SmallVector RVLocs; CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); CCValAssign &VA = RVLocs[0]; assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); assert(VA.isRegLoc() && "Can only return in registers!"); MVT DestVT = VA.getValVT(); MVT CopyVT = DestVT; // Ints smaller than a register still arrive in a full 64-bit // register, so make sure we recognize this. if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) CopyVT = MVT::i64; unsigned SourcePhysReg = VA.getLocReg(); unsigned ResultReg = 0; if (RetVT == CopyVT) { const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg); // If necessary, round the floating result to single precision. } else if (CopyVT == MVT::f64) { ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), ResultReg).addReg(SourcePhysReg); // If only the low half of a general register is needed, generate // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be // used along the fast-isel path (not lowered), and downstream logic // also doesn't like a direct subreg copy on a physical reg.) } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) { // Convert physical register from G8RC to GPRC. SourcePhysReg -= PPC::X0 - PPC::R0; ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg); } assert(ResultReg && "ResultReg unset!"); CLI.InRegs.push_back(SourcePhysReg); CLI.ResultReg = ResultReg; CLI.NumResultRegs = 1; } return true; } bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { CallingConv::ID CC = CLI.CallConv; bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; const MCSymbol *Symbol = CLI.Symbol; if (!Callee && !Symbol) return false; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; // Let SDISel handle vararg functions. if (IsVarArg) return false; // If this is a PC-Rel function, let SDISel handle the call. if (Subtarget->isUsingPCRelativeCalls()) return false; // Handle simple calls for now, with legal return types and // those that can be extended. Type *RetTy = CLI.RetTy; MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && RetVT != MVT::i8) return false; else if (RetVT == MVT::i1 && Subtarget->useCRBits()) // We can't handle boolean returns when CR bits are in use. return false; // FIXME: No multi-register return values yet. if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && RetVT != MVT::f64) { SmallVector RVLocs; CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); if (RVLocs.size() > 1) return false; } // Bail early if more than 8 arguments, as we only currently // handle arguments passed in registers. unsigned NumArgs = CLI.OutVals.size(); if (NumArgs > 8) return false; // Set up the argument vectors. SmallVector Args; SmallVector ArgRegs; SmallVector ArgVTs; SmallVector ArgFlags; Args.reserve(NumArgs); ArgRegs.reserve(NumArgs); ArgVTs.reserve(NumArgs); ArgFlags.reserve(NumArgs); for (unsigned i = 0, ie = NumArgs; i != ie; ++i) { // Only handle easy calls for now. It would be reasonably easy // to handle <= 8-byte structures passed ByVal in registers, but we // have to ensure they are right-justified in the register. ISD::ArgFlagsTy Flags = CLI.OutFlags[i]; if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal()) return false; Value *ArgValue = CLI.OutVals[i]; Type *ArgTy = ArgValue->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) return false; // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP // types, which is passed through vector register. Skip these types and // fallback to default SelectionDAG based selection. if (ArgVT.isVector() || ArgVT == MVT::f128) return false; unsigned Arg = getRegForValue(ArgValue); if (Arg == 0) return false; Args.push_back(ArgValue); ArgRegs.push_back(Arg); ArgVTs.push_back(ArgVT); ArgFlags.push_back(Flags); } // Process the arguments. SmallVector RegArgs; unsigned NumBytes; if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes, IsVarArg)) return false; MachineInstrBuilder MIB; // FIXME: No handling for function pointers yet. This requires // implementing the function descriptor (OPD) setup. const GlobalValue *GV = dyn_cast(Callee); if (!GV) { // patchpoints are a special case; they always dispatch to a pointer value. // However, we don't actually want to generate the indirect call sequence // here (that will be generated, as necessary, during asm printing), and // the call we generate here will be erased by FastISel::selectPatchpoint, // so don't try very hard... if (CLI.IsPatchPoint) MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP)); else return false; } else { // Build direct call with NOP for TOC restore. // FIXME: We can and should optimize away the NOP for local calls. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BL8_NOP)); // Add callee. MIB.addGlobalAddress(GV); } // Add implicit physical register uses to the call. for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) MIB.addReg(RegArgs[II], RegState::Implicit); // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live // into the call. PPCFuncInfo->setUsesTOCBasePtr(); MIB.addReg(PPC::X2, RegState::Implicit); // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; // Finish off the call including any return values. return finishCall(RetVT, CLI, NumBytes); } // Attempt to fast-select a return instruction. bool PPCFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; const ReturnInst *Ret = cast(I); const Function &F = *I->getParent()->getParent(); // Build a list of return value registers. SmallVector RetRegs; CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector Outs; GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context); CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); const Value *RV = Ret->getOperand(0); // FIXME: Only one output register for now. if (ValLocs.size() > 1) return false; // Special case for returning a constant integer of any size - materialize // the constant as an i64 and copy it to the return register. if (const ConstantInt *CI = dyn_cast(RV)) { CCValAssign &VA = ValLocs[0]; Register RetReg = VA.getLocReg(); // We still need to worry about properly extending the sign. For example, // we could have only a single bit or a constant that needs zero // extension rather than sign extension. Make sure we pass the return // value extension property to integer materialization. unsigned SrcReg = PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); RetRegs.push_back(RetReg); } else { unsigned Reg = getRegForValue(RV); if (Reg == 0) return false; // Copy the result values into the output registers. for (unsigned i = 0; i < ValLocs.size(); ++i) { CCValAssign &VA = ValLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); RetRegs.push_back(VA.getLocReg()); unsigned SrcReg = Reg + VA.getValNo(); EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; MVT RVVT = RVEVT.getSimpleVT(); MVT DestVT = VA.getLocVT(); if (RVVT != DestVT && RVVT != MVT::i8 && RVVT != MVT::i16 && RVVT != MVT::i32) return false; if (RVVT != DestVT) { switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: llvm_unreachable("Full value assign but types don't match?"); case CCValAssign::AExt: case CCValAssign::ZExt: { const TargetRegisterClass *RC = (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned TmpReg = createResultReg(RC); if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true)) return false; SrcReg = TmpReg; break; } case CCValAssign::SExt: { const TargetRegisterClass *RC = (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; unsigned TmpReg = createResultReg(RC); if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false)) return false; SrcReg = TmpReg; break; } } } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetRegs[i]) .addReg(SrcReg); } } } MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BLR8)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); return true; } // Attempt to emit an integer extend of SrcReg into DestReg. Both // signed and zero extensions are supported. Return false if we // can't handle it. bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i64) return false; if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32) return false; // Signed extensions use EXTSB, EXTSH, EXTSW. if (!IsZExt) { unsigned Opc; if (SrcVT == MVT::i8) Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64; else if (SrcVT == MVT::i16) Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64; else { assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); Opc = PPC::EXTSW_32_64; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Unsigned 32-bit extensions use RLWINM. } else if (DestVT == MVT::i32) { unsigned MB; if (SrcVT == MVT::i8) MB = 24; else { assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); MB = 16; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); // Unsigned 64-bit extensions use RLDICL (with a 32-bit source). } else { unsigned MB; if (SrcVT == MVT::i8) MB = 56; else if (SrcVT == MVT::i16) MB = 48; else MB = 32; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICL_32_64), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); } return true; } // Attempt to fast-select an indirect branch instruction. bool PPCFastISel::SelectIndirectBr(const Instruction *I) { unsigned AddrReg = getRegForValue(I->getOperand(0)); if (AddrReg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8)) .addReg(AddrReg); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); const IndirectBrInst *IB = cast(I); for (const BasicBlock *SuccBB : IB->successors()) FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]); return true; } // Attempt to fast-select an integer truncate instruction. bool PPCFastISel::SelectTrunc(const Instruction *I) { Value *Src = I->getOperand(0); EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) return false; if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return false; unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; // The only interesting case is when we need to switch register classes. if (SrcVT == MVT::i64) SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32); updateValueMap(I, SrcReg); return true; } // Attempt to fast-select an integer extend instruction. bool PPCFastISel::SelectIntExt(const Instruction *I) { Type *DestTy = I->getType(); Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); bool IsZExt = isa(I); unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; EVT SrcEVT, DestEVT; SrcEVT = TLI.getValueType(DL, SrcTy, true); DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); MVT DestVT = DestEVT.getSimpleVT(); // If we know the register class needed for the result of this // instruction, use it. Otherwise pick the register class of the // correct size that does not contain X0/R0, since we don't know // whether downstream uses permit that assignment. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = (AssignedReg ? MRI.getRegClass(AssignedReg) : (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : &PPC::GPRC_and_GPRC_NOR0RegClass)); unsigned ResultReg = createResultReg(RC); if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) return false; updateValueMap(I, ResultReg); return true; } // Attempt to fast-select an instruction that wasn't handled by // the table-generated machinery. bool PPCFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Load: return SelectLoad(I); case Instruction::Store: return SelectStore(I); case Instruction::Br: return SelectBranch(I); case Instruction::IndirectBr: return SelectIndirectBr(I); case Instruction::FPExt: return SelectFPExt(I); case Instruction::FPTrunc: return SelectFPTrunc(I); case Instruction::SIToFP: return SelectIToFP(I, /*IsSigned*/ true); case Instruction::UIToFP: return SelectIToFP(I, /*IsSigned*/ false); case Instruction::FPToSI: return SelectFPToI(I, /*IsSigned*/ true); case Instruction::FPToUI: return SelectFPToI(I, /*IsSigned*/ false); case Instruction::Add: return SelectBinaryIntOp(I, ISD::ADD); case Instruction::Or: return SelectBinaryIntOp(I, ISD::OR); case Instruction::Sub: return SelectBinaryIntOp(I, ISD::SUB); case Instruction::Call: // On AIX, call lowering uses the DAG-ISEL path currently so that the // callee of the direct function call instruction will be mapped to the // symbol for the function's entry point, which is distinct from the // function descriptor symbol. The latter is the symbol whose XCOFF symbol // name is the C-linkage name of the source level function. if (TM.getTargetTriple().isOSAIX()) break; return selectCall(I); case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: return SelectTrunc(I); case Instruction::ZExt: case Instruction::SExt: return SelectIntExt(I); // Here add other flavors of Instruction::XXX that automated // cases don't catch. For example, switches are terminators // that aren't yet handled. default: break; } return false; } // Materialize a floating-point constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { // If this is a PC-Rel function, let SDISel handle constant pool. if (Subtarget->isUsingPCRelativeCalls()) return false; // No plans to handle long double here. if (VT != MVT::f32 && VT != MVT::f64) return 0; // All FP constants are loaded from the constant pool. Align Alignment = DL.getPrefTypeAlign(CFP->getType()); unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Alignment); const bool HasSPE = Subtarget->hasSPE(); const TargetRegisterClass *RC; if (HasSPE) RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass); else RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass); unsigned DestReg = createResultReg(RC); CodeModel::Model CModel = TM.getCodeModel(); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getConstantPool(*FuncInfo.MF), MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment); unsigned Opc; if (HasSPE) Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD); else Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD); unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); PPCFuncInfo->setUsesTOCBasePtr(); // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). if (CModel == CodeModel::Small) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), TmpReg) .addConstantPoolIndex(Idx).addReg(PPC::X2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg).addMemOperand(MMO); } else { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)). BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); // But for large code model, we must generate a LDtocL followed // by the LF[SD]. if (CModel == CodeModel::Large) { unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0) .addReg(TmpReg2); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) .addReg(TmpReg) .addMemOperand(MMO); } return DestReg; } // Materialize the address of a global value into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // If this is a PC-Rel function, let SDISel handle GV materialization. if (Subtarget->isUsingPCRelativeCalls()) return false; assert(VT == MVT::i64 && "Non-address!"); const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass; unsigned DestReg = createResultReg(RC); // Global values may be plain old object addresses, TLS object // addresses, constant pool entries, or jump tables. How we generate // code for these may depend on small, medium, or large code model. CodeModel::Model CModel = TM.getCodeModel(); // FIXME: Jump tables are not yet required because fast-isel doesn't // handle switches; if that changes, we need them as well. For now, // what follows assumes everything's a generic (or TLS) global address. // FIXME: We don't yet handle the complexity of TLS. if (GV->isThreadLocal()) return 0; PPCFuncInfo->setUsesTOCBasePtr(); // For small code model, generate a simple TOC load. if (CModel == CodeModel::Small) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), DestReg) .addGlobalAddress(GV) .addReg(PPC::X2); else { // If the address is an externally defined symbol, a symbol with common // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: // LDtocL(GV, ADDIStocHA8(%x2, GV)) // Otherwise we generate: // ADDItocL(ADDIStocHA8(%x2, GV), GV) // Either way, start with the ADDIStocHA8: unsigned HighPartReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); if (Subtarget->isGVIndirectSymbol(GV)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); } else { // Otherwise generate the ADDItocL. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), DestReg).addReg(HighPartReg).addGlobalAddress(GV); } } return DestReg; } // Materialize a 32-bit integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC) { unsigned Lo = Imm & 0xFFFF; unsigned Hi = (Imm >> 16) & 0xFFFF; unsigned ResultReg = createResultReg(RC); bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); if (isInt<16>(Imm)) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg) .addImm(Imm); else if (Lo) { // Both Lo and Hi have nonzero bits. unsigned TmpReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg) .addImm(Hi); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg) .addReg(TmpReg).addImm(Lo); } else // Just Hi bits. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg) .addImm(Hi); return ResultReg; } // Materialize a 64-bit integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC) { unsigned Remainder = 0; unsigned Shift = 0; // If the value doesn't fit in 32 bits, see if we can shift it // so that it fits in 32 bits. if (!isInt<32>(Imm)) { Shift = countTrailingZeros(Imm); int64_t ImmSh = static_cast(Imm) >> Shift; if (isInt<32>(ImmSh)) Imm = ImmSh; else { Remainder = Imm; Shift = 32; Imm >>= 32; } } // Handle the high-order 32 bits (if shifted) or the whole 32 bits // (if not shifted). unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC); if (!Shift) return TmpReg1; // If upper 32 bits were not zero, we've built them and need to shift // them into place. unsigned TmpReg2; if (Imm) { TmpReg2 = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR), TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift); } else TmpReg2 = TmpReg1; unsigned TmpReg3, Hi, Lo; if ((Hi = (Remainder >> 16) & 0xFFFF)) { TmpReg3 = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8), TmpReg3).addReg(TmpReg2).addImm(Hi); } else TmpReg3 = TmpReg2; if ((Lo = Remainder & 0xFFFF)) { unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8), ResultReg).addReg(TmpReg3).addImm(Lo); return ResultReg; } return TmpReg3; } // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT, bool UseSExt) { // If we're using CR bit registers for i1 values, handle that as a special // case first. if (VT == MVT::i1 && Subtarget->useCRBits()) { unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); return ImmReg; } if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue(); // If the constant is in range, use a load-immediate. // Since LI will sign extend the constant we need to make sure that for // our zeroext constants that the sign extended constant fits into 16-bits - // a range of 0..0x7fff. if (isInt<16>(Imm)) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(Imm); return ImmReg; } // Construct the constant piecewise. if (VT == MVT::i64) return PPCMaterialize64BitInt(Imm, RC); else if (VT == MVT::i32) return PPCMaterialize32BitInt(Imm, RC); return 0; } // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); if (const ConstantFP *CFP = dyn_cast(C)) return PPCMaterializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast(C)) return PPCMaterializeGV(GV, VT); else if (const ConstantInt *CI = dyn_cast(C)) // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo // assumes that constant PHI operands will be zero extended, and failure to // match that assumption will cause problems if we sign extend here but // some user of a PHI is in a block for which we fall back to full SDAG // instruction selection. return PPCMaterializeInt(CI, VT, false); return 0; } // Materialize the address created by an alloca into a register, and // return the register number (or zero if we failed to handle it). unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; MVT VT; if (!isLoadTypeLegal(AI->getType(), VT)) return 0; DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), ResultReg).addFrameIndex(SI->second).addImm(0); return ResultReg; } return 0; } // Fold loads into extends when possible. // FIXME: We can have multiple redundant extend/trunc instructions // following a load. The folding only picks up one. Extend this // to check subsequent instructions for the same pattern and remove // them. Thus ResultReg should be the def reg for the last redundant // instruction in a chain, and all intervening instructions can be // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll // to add ELF64-NOT: rldicl to the appropriate tests when this works. bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) { // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(LI->getType(), VT)) return false; // Combine load followed by zero- or sign-extend. bool IsZExt = false; switch(MI->getOpcode()) { default: return false; case PPC::RLDICL: case PPC::RLDICL_32_64: { IsZExt = true; unsigned MB = MI->getOperand(3).getImm(); if ((VT == MVT::i8 && MB <= 56) || (VT == MVT::i16 && MB <= 48) || (VT == MVT::i32 && MB <= 32)) break; return false; } case PPC::RLWINM: case PPC::RLWINM8: { IsZExt = true; unsigned MB = MI->getOperand(3).getImm(); if ((VT == MVT::i8 && MB <= 24) || (VT == MVT::i16 && MB <= 16)) break; return false; } case PPC::EXTSB: case PPC::EXTSB8: case PPC::EXTSB8_32_64: /* There is no sign-extending load-byte instruction. */ return false; case PPC::EXTSH: case PPC::EXTSH8: case PPC::EXTSH8_32_64: { if (VT != MVT::i16 && VT != MVT::i8) return false; break; } case PPC::EXTSW: case PPC::EXTSW_32: case PPC::EXTSW_32_64: { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) return false; break; } } // See if we can handle this address. Address Addr; if (!PPCComputeAddress(LI->getOperand(0), Addr)) return false; Register ResultReg = MI->getOperand(0).getReg(); if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt, Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; MachineBasicBlock::iterator I(MI); removeDeadCode(I, std::next(I)); return true; } // Attempt to lower call arguments in a faster way than done by // the selection DAG code. bool PPCFastISel::fastLowerArguments() { // Defer to normal argument lowering for now. It's reasonably // efficient. Consider doing something like ARM to handle the // case where all args fit in registers, no varargs, no float // or vector args. return false; } // Handle materializing integer constants into a register. This is not // automatically generated for PowerPC, so must be explicitly created here. unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { if (Opc != ISD::Constant) return 0; // If we're using CR bit registers for i1 values, handle that as a special // case first. if (VT == MVT::i1 && Subtarget->useCRBits()) { unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); return ImmReg; } if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); if (VT == MVT::i64) return PPCMaterialize64BitInt(Imm, RC); else return PPCMaterialize32BitInt(Imm, RC); } // Override for ADDI and ADDI8 to set the correct register class // on RHS operand 0. The automatic infrastructure naively assumes // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost // for these cases. At the moment, none of the other automatically // generated RI instructions require special treatment. However, once // SelectSelect is implemented, "isel" requires similar handling. // // Also be conservative about the output register class. Avoid // assigning R0 or X0 to the output register for GPRC and G8RC // register classes, as any such result could be used in ADDI, etc., // where those regs have another meaning. unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { if (MachineInstOpcode == PPC::ADDI) MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass); else if (MachineInstOpcode == PPC::ADDI8) MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass); const TargetRegisterClass *UseRC = (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Op0IsKill, Imm); } // Override for instructions with one register operand to avoid use of // R0/X0. The automatic infrastructure isn't aware of the context so // we must be conservative. unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass* RC, unsigned Op0, bool Op0IsKill) { const TargetRegisterClass *UseRC = (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); } // Override for instructions with two register operands to avoid use // of R0/X0. The automatic infrastructure isn't aware of the context // so we must be conservative. unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass* RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { const TargetRegisterClass *UseRC = (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, Op1, Op1IsKill); } namespace llvm { // Create the fast instruction selector for PowerPC64 ELF. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) { // Only available on 64-bit ELF for now. const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget(); if (Subtarget.is64BitELFABI()) return new PPCFastISel(FuncInfo, LibInfo); return nullptr; } }