588 lines
16 KiB
C
588 lines
16 KiB
C
|
//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
// This file defines the interfaces that NVPTX uses to lower LLVM code into a
|
||
|
// selection DAG.
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
|
||
|
#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
|
||
|
|
||
|
#include "NVPTX.h"
|
||
|
#include "llvm/CodeGen/SelectionDAG.h"
|
||
|
#include "llvm/CodeGen/TargetLowering.h"
|
||
|
|
||
|
namespace llvm {
|
||
|
namespace NVPTXISD {
|
||
|
enum NodeType : unsigned {
|
||
|
// Start the numbering from where ISD NodeType finishes.
|
||
|
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||
|
Wrapper,
|
||
|
CALL,
|
||
|
RET_FLAG,
|
||
|
LOAD_PARAM,
|
||
|
DeclareParam,
|
||
|
DeclareScalarParam,
|
||
|
DeclareRetParam,
|
||
|
DeclareRet,
|
||
|
DeclareScalarRet,
|
||
|
PrintCall,
|
||
|
PrintConvergentCall,
|
||
|
PrintCallUni,
|
||
|
PrintConvergentCallUni,
|
||
|
CallArgBegin,
|
||
|
CallArg,
|
||
|
LastCallArg,
|
||
|
CallArgEnd,
|
||
|
CallVoid,
|
||
|
CallVal,
|
||
|
CallSymbol,
|
||
|
Prototype,
|
||
|
MoveParam,
|
||
|
PseudoUseParam,
|
||
|
RETURN,
|
||
|
CallSeqBegin,
|
||
|
CallSeqEnd,
|
||
|
CallPrototype,
|
||
|
ProxyReg,
|
||
|
FUN_SHFL_CLAMP,
|
||
|
FUN_SHFR_CLAMP,
|
||
|
MUL_WIDE_SIGNED,
|
||
|
MUL_WIDE_UNSIGNED,
|
||
|
IMAD,
|
||
|
SETP_F16X2,
|
||
|
Dummy,
|
||
|
|
||
|
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||
|
LoadV4,
|
||
|
LDGV2, // LDG.v2
|
||
|
LDGV4, // LDG.v4
|
||
|
LDUV2, // LDU.v2
|
||
|
LDUV4, // LDU.v4
|
||
|
StoreV2,
|
||
|
StoreV4,
|
||
|
LoadParam,
|
||
|
LoadParamV2,
|
||
|
LoadParamV4,
|
||
|
StoreParam,
|
||
|
StoreParamV2,
|
||
|
StoreParamV4,
|
||
|
StoreParamS32, // to sext and store a <32bit value, not used currently
|
||
|
StoreParamU32, // to zext and store a <32bit value, not used currently
|
||
|
StoreRetval,
|
||
|
StoreRetvalV2,
|
||
|
StoreRetvalV4,
|
||
|
|
||
|
// Texture intrinsics
|
||
|
Tex1DFloatS32,
|
||
|
Tex1DFloatFloat,
|
||
|
Tex1DFloatFloatLevel,
|
||
|
Tex1DFloatFloatGrad,
|
||
|
Tex1DS32S32,
|
||
|
Tex1DS32Float,
|
||
|
Tex1DS32FloatLevel,
|
||
|
Tex1DS32FloatGrad,
|
||
|
Tex1DU32S32,
|
||
|
Tex1DU32Float,
|
||
|
Tex1DU32FloatLevel,
|
||
|
Tex1DU32FloatGrad,
|
||
|
Tex1DArrayFloatS32,
|
||
|
Tex1DArrayFloatFloat,
|
||
|
Tex1DArrayFloatFloatLevel,
|
||
|
Tex1DArrayFloatFloatGrad,
|
||
|
Tex1DArrayS32S32,
|
||
|
Tex1DArrayS32Float,
|
||
|
Tex1DArrayS32FloatLevel,
|
||
|
Tex1DArrayS32FloatGrad,
|
||
|
Tex1DArrayU32S32,
|
||
|
Tex1DArrayU32Float,
|
||
|
Tex1DArrayU32FloatLevel,
|
||
|
Tex1DArrayU32FloatGrad,
|
||
|
Tex2DFloatS32,
|
||
|
Tex2DFloatFloat,
|
||
|
Tex2DFloatFloatLevel,
|
||
|
Tex2DFloatFloatGrad,
|
||
|
Tex2DS32S32,
|
||
|
Tex2DS32Float,
|
||
|
Tex2DS32FloatLevel,
|
||
|
Tex2DS32FloatGrad,
|
||
|
Tex2DU32S32,
|
||
|
Tex2DU32Float,
|
||
|
Tex2DU32FloatLevel,
|
||
|
Tex2DU32FloatGrad,
|
||
|
Tex2DArrayFloatS32,
|
||
|
Tex2DArrayFloatFloat,
|
||
|
Tex2DArrayFloatFloatLevel,
|
||
|
Tex2DArrayFloatFloatGrad,
|
||
|
Tex2DArrayS32S32,
|
||
|
Tex2DArrayS32Float,
|
||
|
Tex2DArrayS32FloatLevel,
|
||
|
Tex2DArrayS32FloatGrad,
|
||
|
Tex2DArrayU32S32,
|
||
|
Tex2DArrayU32Float,
|
||
|
Tex2DArrayU32FloatLevel,
|
||
|
Tex2DArrayU32FloatGrad,
|
||
|
Tex3DFloatS32,
|
||
|
Tex3DFloatFloat,
|
||
|
Tex3DFloatFloatLevel,
|
||
|
Tex3DFloatFloatGrad,
|
||
|
Tex3DS32S32,
|
||
|
Tex3DS32Float,
|
||
|
Tex3DS32FloatLevel,
|
||
|
Tex3DS32FloatGrad,
|
||
|
Tex3DU32S32,
|
||
|
Tex3DU32Float,
|
||
|
Tex3DU32FloatLevel,
|
||
|
Tex3DU32FloatGrad,
|
||
|
TexCubeFloatFloat,
|
||
|
TexCubeFloatFloatLevel,
|
||
|
TexCubeS32Float,
|
||
|
TexCubeS32FloatLevel,
|
||
|
TexCubeU32Float,
|
||
|
TexCubeU32FloatLevel,
|
||
|
TexCubeArrayFloatFloat,
|
||
|
TexCubeArrayFloatFloatLevel,
|
||
|
TexCubeArrayS32Float,
|
||
|
TexCubeArrayS32FloatLevel,
|
||
|
TexCubeArrayU32Float,
|
||
|
TexCubeArrayU32FloatLevel,
|
||
|
Tld4R2DFloatFloat,
|
||
|
Tld4G2DFloatFloat,
|
||
|
Tld4B2DFloatFloat,
|
||
|
Tld4A2DFloatFloat,
|
||
|
Tld4R2DS64Float,
|
||
|
Tld4G2DS64Float,
|
||
|
Tld4B2DS64Float,
|
||
|
Tld4A2DS64Float,
|
||
|
Tld4R2DU64Float,
|
||
|
Tld4G2DU64Float,
|
||
|
Tld4B2DU64Float,
|
||
|
Tld4A2DU64Float,
|
||
|
TexUnified1DFloatS32,
|
||
|
TexUnified1DFloatFloat,
|
||
|
TexUnified1DFloatFloatLevel,
|
||
|
TexUnified1DFloatFloatGrad,
|
||
|
TexUnified1DS32S32,
|
||
|
TexUnified1DS32Float,
|
||
|
TexUnified1DS32FloatLevel,
|
||
|
TexUnified1DS32FloatGrad,
|
||
|
TexUnified1DU32S32,
|
||
|
TexUnified1DU32Float,
|
||
|
TexUnified1DU32FloatLevel,
|
||
|
TexUnified1DU32FloatGrad,
|
||
|
TexUnified1DArrayFloatS32,
|
||
|
TexUnified1DArrayFloatFloat,
|
||
|
TexUnified1DArrayFloatFloatLevel,
|
||
|
TexUnified1DArrayFloatFloatGrad,
|
||
|
TexUnified1DArrayS32S32,
|
||
|
TexUnified1DArrayS32Float,
|
||
|
TexUnified1DArrayS32FloatLevel,
|
||
|
TexUnified1DArrayS32FloatGrad,
|
||
|
TexUnified1DArrayU32S32,
|
||
|
TexUnified1DArrayU32Float,
|
||
|
TexUnified1DArrayU32FloatLevel,
|
||
|
TexUnified1DArrayU32FloatGrad,
|
||
|
TexUnified2DFloatS32,
|
||
|
TexUnified2DFloatFloat,
|
||
|
TexUnified2DFloatFloatLevel,
|
||
|
TexUnified2DFloatFloatGrad,
|
||
|
TexUnified2DS32S32,
|
||
|
TexUnified2DS32Float,
|
||
|
TexUnified2DS32FloatLevel,
|
||
|
TexUnified2DS32FloatGrad,
|
||
|
TexUnified2DU32S32,
|
||
|
TexUnified2DU32Float,
|
||
|
TexUnified2DU32FloatLevel,
|
||
|
TexUnified2DU32FloatGrad,
|
||
|
TexUnified2DArrayFloatS32,
|
||
|
TexUnified2DArrayFloatFloat,
|
||
|
TexUnified2DArrayFloatFloatLevel,
|
||
|
TexUnified2DArrayFloatFloatGrad,
|
||
|
TexUnified2DArrayS32S32,
|
||
|
TexUnified2DArrayS32Float,
|
||
|
TexUnified2DArrayS32FloatLevel,
|
||
|
TexUnified2DArrayS32FloatGrad,
|
||
|
TexUnified2DArrayU32S32,
|
||
|
TexUnified2DArrayU32Float,
|
||
|
TexUnified2DArrayU32FloatLevel,
|
||
|
TexUnified2DArrayU32FloatGrad,
|
||
|
TexUnified3DFloatS32,
|
||
|
TexUnified3DFloatFloat,
|
||
|
TexUnified3DFloatFloatLevel,
|
||
|
TexUnified3DFloatFloatGrad,
|
||
|
TexUnified3DS32S32,
|
||
|
TexUnified3DS32Float,
|
||
|
TexUnified3DS32FloatLevel,
|
||
|
TexUnified3DS32FloatGrad,
|
||
|
TexUnified3DU32S32,
|
||
|
TexUnified3DU32Float,
|
||
|
TexUnified3DU32FloatLevel,
|
||
|
TexUnified3DU32FloatGrad,
|
||
|
TexUnifiedCubeFloatFloat,
|
||
|
TexUnifiedCubeFloatFloatLevel,
|
||
|
TexUnifiedCubeS32Float,
|
||
|
TexUnifiedCubeS32FloatLevel,
|
||
|
TexUnifiedCubeU32Float,
|
||
|
TexUnifiedCubeU32FloatLevel,
|
||
|
TexUnifiedCubeArrayFloatFloat,
|
||
|
TexUnifiedCubeArrayFloatFloatLevel,
|
||
|
TexUnifiedCubeArrayS32Float,
|
||
|
TexUnifiedCubeArrayS32FloatLevel,
|
||
|
TexUnifiedCubeArrayU32Float,
|
||
|
TexUnifiedCubeArrayU32FloatLevel,
|
||
|
Tld4UnifiedR2DFloatFloat,
|
||
|
Tld4UnifiedG2DFloatFloat,
|
||
|
Tld4UnifiedB2DFloatFloat,
|
||
|
Tld4UnifiedA2DFloatFloat,
|
||
|
Tld4UnifiedR2DS64Float,
|
||
|
Tld4UnifiedG2DS64Float,
|
||
|
Tld4UnifiedB2DS64Float,
|
||
|
Tld4UnifiedA2DS64Float,
|
||
|
Tld4UnifiedR2DU64Float,
|
||
|
Tld4UnifiedG2DU64Float,
|
||
|
Tld4UnifiedB2DU64Float,
|
||
|
Tld4UnifiedA2DU64Float,
|
||
|
|
||
|
// Surface intrinsics
|
||
|
Suld1DI8Clamp,
|
||
|
Suld1DI16Clamp,
|
||
|
Suld1DI32Clamp,
|
||
|
Suld1DI64Clamp,
|
||
|
Suld1DV2I8Clamp,
|
||
|
Suld1DV2I16Clamp,
|
||
|
Suld1DV2I32Clamp,
|
||
|
Suld1DV2I64Clamp,
|
||
|
Suld1DV4I8Clamp,
|
||
|
Suld1DV4I16Clamp,
|
||
|
Suld1DV4I32Clamp,
|
||
|
|
||
|
Suld1DArrayI8Clamp,
|
||
|
Suld1DArrayI16Clamp,
|
||
|
Suld1DArrayI32Clamp,
|
||
|
Suld1DArrayI64Clamp,
|
||
|
Suld1DArrayV2I8Clamp,
|
||
|
Suld1DArrayV2I16Clamp,
|
||
|
Suld1DArrayV2I32Clamp,
|
||
|
Suld1DArrayV2I64Clamp,
|
||
|
Suld1DArrayV4I8Clamp,
|
||
|
Suld1DArrayV4I16Clamp,
|
||
|
Suld1DArrayV4I32Clamp,
|
||
|
|
||
|
Suld2DI8Clamp,
|
||
|
Suld2DI16Clamp,
|
||
|
Suld2DI32Clamp,
|
||
|
Suld2DI64Clamp,
|
||
|
Suld2DV2I8Clamp,
|
||
|
Suld2DV2I16Clamp,
|
||
|
Suld2DV2I32Clamp,
|
||
|
Suld2DV2I64Clamp,
|
||
|
Suld2DV4I8Clamp,
|
||
|
Suld2DV4I16Clamp,
|
||
|
Suld2DV4I32Clamp,
|
||
|
|
||
|
Suld2DArrayI8Clamp,
|
||
|
Suld2DArrayI16Clamp,
|
||
|
Suld2DArrayI32Clamp,
|
||
|
Suld2DArrayI64Clamp,
|
||
|
Suld2DArrayV2I8Clamp,
|
||
|
Suld2DArrayV2I16Clamp,
|
||
|
Suld2DArrayV2I32Clamp,
|
||
|
Suld2DArrayV2I64Clamp,
|
||
|
Suld2DArrayV4I8Clamp,
|
||
|
Suld2DArrayV4I16Clamp,
|
||
|
Suld2DArrayV4I32Clamp,
|
||
|
|
||
|
Suld3DI8Clamp,
|
||
|
Suld3DI16Clamp,
|
||
|
Suld3DI32Clamp,
|
||
|
Suld3DI64Clamp,
|
||
|
Suld3DV2I8Clamp,
|
||
|
Suld3DV2I16Clamp,
|
||
|
Suld3DV2I32Clamp,
|
||
|
Suld3DV2I64Clamp,
|
||
|
Suld3DV4I8Clamp,
|
||
|
Suld3DV4I16Clamp,
|
||
|
Suld3DV4I32Clamp,
|
||
|
|
||
|
Suld1DI8Trap,
|
||
|
Suld1DI16Trap,
|
||
|
Suld1DI32Trap,
|
||
|
Suld1DI64Trap,
|
||
|
Suld1DV2I8Trap,
|
||
|
Suld1DV2I16Trap,
|
||
|
Suld1DV2I32Trap,
|
||
|
Suld1DV2I64Trap,
|
||
|
Suld1DV4I8Trap,
|
||
|
Suld1DV4I16Trap,
|
||
|
Suld1DV4I32Trap,
|
||
|
|
||
|
Suld1DArrayI8Trap,
|
||
|
Suld1DArrayI16Trap,
|
||
|
Suld1DArrayI32Trap,
|
||
|
Suld1DArrayI64Trap,
|
||
|
Suld1DArrayV2I8Trap,
|
||
|
Suld1DArrayV2I16Trap,
|
||
|
Suld1DArrayV2I32Trap,
|
||
|
Suld1DArrayV2I64Trap,
|
||
|
Suld1DArrayV4I8Trap,
|
||
|
Suld1DArrayV4I16Trap,
|
||
|
Suld1DArrayV4I32Trap,
|
||
|
|
||
|
Suld2DI8Trap,
|
||
|
Suld2DI16Trap,
|
||
|
Suld2DI32Trap,
|
||
|
Suld2DI64Trap,
|
||
|
Suld2DV2I8Trap,
|
||
|
Suld2DV2I16Trap,
|
||
|
Suld2DV2I32Trap,
|
||
|
Suld2DV2I64Trap,
|
||
|
Suld2DV4I8Trap,
|
||
|
Suld2DV4I16Trap,
|
||
|
Suld2DV4I32Trap,
|
||
|
|
||
|
Suld2DArrayI8Trap,
|
||
|
Suld2DArrayI16Trap,
|
||
|
Suld2DArrayI32Trap,
|
||
|
Suld2DArrayI64Trap,
|
||
|
Suld2DArrayV2I8Trap,
|
||
|
Suld2DArrayV2I16Trap,
|
||
|
Suld2DArrayV2I32Trap,
|
||
|
Suld2DArrayV2I64Trap,
|
||
|
Suld2DArrayV4I8Trap,
|
||
|
Suld2DArrayV4I16Trap,
|
||
|
Suld2DArrayV4I32Trap,
|
||
|
|
||
|
Suld3DI8Trap,
|
||
|
Suld3DI16Trap,
|
||
|
Suld3DI32Trap,
|
||
|
Suld3DI64Trap,
|
||
|
Suld3DV2I8Trap,
|
||
|
Suld3DV2I16Trap,
|
||
|
Suld3DV2I32Trap,
|
||
|
Suld3DV2I64Trap,
|
||
|
Suld3DV4I8Trap,
|
||
|
Suld3DV4I16Trap,
|
||
|
Suld3DV4I32Trap,
|
||
|
|
||
|
Suld1DI8Zero,
|
||
|
Suld1DI16Zero,
|
||
|
Suld1DI32Zero,
|
||
|
Suld1DI64Zero,
|
||
|
Suld1DV2I8Zero,
|
||
|
Suld1DV2I16Zero,
|
||
|
Suld1DV2I32Zero,
|
||
|
Suld1DV2I64Zero,
|
||
|
Suld1DV4I8Zero,
|
||
|
Suld1DV4I16Zero,
|
||
|
Suld1DV4I32Zero,
|
||
|
|
||
|
Suld1DArrayI8Zero,
|
||
|
Suld1DArrayI16Zero,
|
||
|
Suld1DArrayI32Zero,
|
||
|
Suld1DArrayI64Zero,
|
||
|
Suld1DArrayV2I8Zero,
|
||
|
Suld1DArrayV2I16Zero,
|
||
|
Suld1DArrayV2I32Zero,
|
||
|
Suld1DArrayV2I64Zero,
|
||
|
Suld1DArrayV4I8Zero,
|
||
|
Suld1DArrayV4I16Zero,
|
||
|
Suld1DArrayV4I32Zero,
|
||
|
|
||
|
Suld2DI8Zero,
|
||
|
Suld2DI16Zero,
|
||
|
Suld2DI32Zero,
|
||
|
Suld2DI64Zero,
|
||
|
Suld2DV2I8Zero,
|
||
|
Suld2DV2I16Zero,
|
||
|
Suld2DV2I32Zero,
|
||
|
Suld2DV2I64Zero,
|
||
|
Suld2DV4I8Zero,
|
||
|
Suld2DV4I16Zero,
|
||
|
Suld2DV4I32Zero,
|
||
|
|
||
|
Suld2DArrayI8Zero,
|
||
|
Suld2DArrayI16Zero,
|
||
|
Suld2DArrayI32Zero,
|
||
|
Suld2DArrayI64Zero,
|
||
|
Suld2DArrayV2I8Zero,
|
||
|
Suld2DArrayV2I16Zero,
|
||
|
Suld2DArrayV2I32Zero,
|
||
|
Suld2DArrayV2I64Zero,
|
||
|
Suld2DArrayV4I8Zero,
|
||
|
Suld2DArrayV4I16Zero,
|
||
|
Suld2DArrayV4I32Zero,
|
||
|
|
||
|
Suld3DI8Zero,
|
||
|
Suld3DI16Zero,
|
||
|
Suld3DI32Zero,
|
||
|
Suld3DI64Zero,
|
||
|
Suld3DV2I8Zero,
|
||
|
Suld3DV2I16Zero,
|
||
|
Suld3DV2I32Zero,
|
||
|
Suld3DV2I64Zero,
|
||
|
Suld3DV4I8Zero,
|
||
|
Suld3DV4I16Zero,
|
||
|
Suld3DV4I32Zero
|
||
|
};
|
||
|
}
|
||
|
|
||
|
class NVPTXSubtarget;
|
||
|
|
||
|
//===--------------------------------------------------------------------===//
|
||
|
// TargetLowering Implementation
|
||
|
//===--------------------------------------------------------------------===//
|
||
|
class NVPTXTargetLowering : public TargetLowering {
|
||
|
public:
|
||
|
explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
||
|
const NVPTXSubtarget &STI);
|
||
|
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
||
|
|
||
|
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||
|
|
||
|
const char *getTargetNodeName(unsigned Opcode) const override;
|
||
|
|
||
|
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
|
||
|
MachineFunction &MF,
|
||
|
unsigned Intrinsic) const override;
|
||
|
|
||
|
/// isLegalAddressingMode - Return true if the addressing mode represented
|
||
|
/// by AM is legal for this target, for a load/store of the specified type
|
||
|
/// Used to guide target specific optimizations, like loop strength
|
||
|
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
|
||
|
/// address mode (CodeGenPrepare.cpp)
|
||
|
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||
|
unsigned AS,
|
||
|
Instruction *I = nullptr) const override;
|
||
|
|
||
|
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
|
||
|
// Truncating 64-bit to 32-bit is free in SASS.
|
||
|
if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
|
||
|
return false;
|
||
|
return SrcTy->getPrimitiveSizeInBits() == 64 &&
|
||
|
DstTy->getPrimitiveSizeInBits() == 32;
|
||
|
}
|
||
|
|
||
|
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
|
||
|
EVT VT) const override {
|
||
|
if (VT.isVector())
|
||
|
return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
|
||
|
return MVT::i1;
|
||
|
}
|
||
|
|
||
|
ConstraintType getConstraintType(StringRef Constraint) const override;
|
||
|
std::pair<unsigned, const TargetRegisterClass *>
|
||
|
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||
|
StringRef Constraint, MVT VT) const override;
|
||
|
|
||
|
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||
|
bool isVarArg,
|
||
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||
|
const SDLoc &dl, SelectionDAG &DAG,
|
||
|
SmallVectorImpl<SDValue> &InVals) const override;
|
||
|
|
||
|
SDValue LowerCall(CallLoweringInfo &CLI,
|
||
|
SmallVectorImpl<SDValue> &InVals) const override;
|
||
|
|
||
|
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
|
||
|
const SmallVectorImpl<ISD::OutputArg> &,
|
||
|
MaybeAlign retAlignment, const CallBase &CB,
|
||
|
unsigned UniqueCallSite) const;
|
||
|
|
||
|
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
||
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||
|
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
|
||
|
SelectionDAG &DAG) const override;
|
||
|
|
||
|
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
|
||
|
std::vector<SDValue> &Ops,
|
||
|
SelectionDAG &DAG) const override;
|
||
|
|
||
|
const NVPTXTargetMachine *nvTM;
|
||
|
|
||
|
// PTX always uses 32-bit shift amounts
|
||
|
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
|
||
|
return MVT::i32;
|
||
|
}
|
||
|
|
||
|
TargetLoweringBase::LegalizeTypeAction
|
||
|
getPreferredVectorAction(MVT VT) const override;
|
||
|
|
||
|
// Get the degree of precision we want from 32-bit floating point division
|
||
|
// operations.
|
||
|
//
|
||
|
// 0 - Use ptx div.approx
|
||
|
// 1 - Use ptx.div.full (approximate, but less so than div.approx)
|
||
|
// 2 - Use IEEE-compliant div instructions, if available.
|
||
|
int getDivF32Level() const;
|
||
|
|
||
|
// Get whether we should use a precise or approximate 32-bit floating point
|
||
|
// sqrt instruction.
|
||
|
bool usePrecSqrtF32() const;
|
||
|
|
||
|
// Get whether we should use instructions that flush floating-point denormals
|
||
|
// to sign-preserving zero.
|
||
|
bool useF32FTZ(const MachineFunction &MF) const;
|
||
|
|
||
|
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
|
||
|
int &ExtraSteps, bool &UseOneConst,
|
||
|
bool Reciprocal) const override;
|
||
|
|
||
|
unsigned combineRepeatedFPDivisors() const override { return 2; }
|
||
|
|
||
|
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
|
||
|
bool allowUnsafeFPMath(MachineFunction &MF) const;
|
||
|
|
||
|
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||
|
EVT) const override {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
|
||
|
|
||
|
// The default is to transform llvm.ctlz(x, false) (where false indicates that
|
||
|
// x == 0 is not undefined behavior) into a branch that checks whether x is 0
|
||
|
// and avoids calling ctlz in that case. We have a dedicated ctlz
|
||
|
// instruction, so we say that ctlz is cheap to speculate.
|
||
|
bool isCheapToSpeculateCtlz() const override { return true; }
|
||
|
|
||
|
private:
|
||
|
const NVPTXSubtarget &STI; // cache the subtarget here
|
||
|
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
|
||
|
|
||
|
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||
|
|
||
|
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
|
||
|
|
||
|
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
|
||
|
|
||
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
|
||
|
|
||
|
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
||
|
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||
|
|
||
|
SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
|
||
|
|
||
|
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||
|
SelectionDAG &DAG) const override;
|
||
|
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||
|
|
||
|
Align getArgumentAlignment(SDValue Callee, const CallBase *CB, Type *Ty,
|
||
|
unsigned Idx, const DataLayout &DL) const;
|
||
|
};
|
||
|
} // namespace llvm
|
||
|
|
||
|
#endif
|