153 lines
4.8 KiB
C++
153 lines
4.8 KiB
C++
|
//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
// \file
|
||
|
// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
|
||
|
// the size is large or is not a compile-time constant.
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "NVPTXLowerAggrCopies.h"
|
||
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||
|
#include "llvm/CodeGen/StackProtector.h"
|
||
|
#include "llvm/IR/Constants.h"
|
||
|
#include "llvm/IR/DataLayout.h"
|
||
|
#include "llvm/IR/Function.h"
|
||
|
#include "llvm/IR/IRBuilder.h"
|
||
|
#include "llvm/IR/Instructions.h"
|
||
|
#include "llvm/IR/IntrinsicInst.h"
|
||
|
#include "llvm/IR/Intrinsics.h"
|
||
|
#include "llvm/IR/LLVMContext.h"
|
||
|
#include "llvm/IR/Module.h"
|
||
|
#include "llvm/Support/Debug.h"
|
||
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||
|
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
|
||
|
|
||
|
#define DEBUG_TYPE "nvptx"
|
||
|
|
||
|
using namespace llvm;
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
// actual analysis class, which is a functionpass
|
||
|
struct NVPTXLowerAggrCopies : public FunctionPass {
|
||
|
static char ID;
|
||
|
|
||
|
NVPTXLowerAggrCopies() : FunctionPass(ID) {}
|
||
|
|
||
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||
|
AU.addPreserved<StackProtector>();
|
||
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||
|
}
|
||
|
|
||
|
bool runOnFunction(Function &F) override;
|
||
|
|
||
|
static const unsigned MaxAggrCopySize = 128;
|
||
|
|
||
|
StringRef getPassName() const override {
|
||
|
return "Lower aggregate copies/intrinsics into loops";
|
||
|
}
|
||
|
};
|
||
|
|
||
|
char NVPTXLowerAggrCopies::ID = 0;
|
||
|
|
||
|
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
|
||
|
SmallVector<LoadInst *, 4> AggrLoads;
|
||
|
SmallVector<MemIntrinsic *, 4> MemCalls;
|
||
|
|
||
|
const DataLayout &DL = F.getParent()->getDataLayout();
|
||
|
LLVMContext &Context = F.getParent()->getContext();
|
||
|
const TargetTransformInfo &TTI =
|
||
|
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
||
|
|
||
|
// Collect all aggregate loads and mem* calls.
|
||
|
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
|
||
|
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
|
||
|
++II) {
|
||
|
if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
|
||
|
if (!LI->hasOneUse())
|
||
|
continue;
|
||
|
|
||
|
if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
|
||
|
continue;
|
||
|
|
||
|
if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
|
||
|
if (SI->getOperand(0) != LI)
|
||
|
continue;
|
||
|
AggrLoads.push_back(LI);
|
||
|
}
|
||
|
} else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
|
||
|
// Convert intrinsic calls with variable size or with constant size
|
||
|
// larger than the MaxAggrCopySize threshold.
|
||
|
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
|
||
|
if (LenCI->getZExtValue() >= MaxAggrCopySize) {
|
||
|
MemCalls.push_back(IntrCall);
|
||
|
}
|
||
|
} else {
|
||
|
MemCalls.push_back(IntrCall);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Do the transformation of an aggr load/copy/set to a loop
|
||
|
//
|
||
|
for (LoadInst *LI : AggrLoads) {
|
||
|
auto *SI = cast<StoreInst>(*LI->user_begin());
|
||
|
Value *SrcAddr = LI->getOperand(0);
|
||
|
Value *DstAddr = SI->getOperand(1);
|
||
|
unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
|
||
|
ConstantInt *CopyLen =
|
||
|
ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
|
||
|
|
||
|
createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
|
||
|
/* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
|
||
|
/* CopyLen */ CopyLen,
|
||
|
/* SrcAlign */ LI->getAlign(),
|
||
|
/* DestAlign */ SI->getAlign(),
|
||
|
/* SrcIsVolatile */ LI->isVolatile(),
|
||
|
/* DstIsVolatile */ SI->isVolatile(), TTI);
|
||
|
|
||
|
SI->eraseFromParent();
|
||
|
LI->eraseFromParent();
|
||
|
}
|
||
|
|
||
|
// Transform mem* intrinsic calls.
|
||
|
for (MemIntrinsic *MemCall : MemCalls) {
|
||
|
if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
|
||
|
expandMemCpyAsLoop(Memcpy, TTI);
|
||
|
} else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
|
||
|
expandMemMoveAsLoop(Memmove);
|
||
|
} else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
|
||
|
expandMemSetAsLoop(Memset);
|
||
|
}
|
||
|
MemCall->eraseFromParent();
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
namespace llvm {
|
||
|
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
|
||
|
}
|
||
|
|
||
|
INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
|
||
|
"Lower aggregate copies, and llvm.mem* intrinsics into loops",
|
||
|
false, false)
|
||
|
|
||
|
FunctionPass *llvm::createLowerAggrCopies() {
|
||
|
return new NVPTXLowerAggrCopies();
|
||
|
}
|