//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // \file // Interface file for the IRSimilarityIdentifier for identifying similarities in // IR including the IRInstructionMapper, which maps an Instruction to unsigned // integers. // // Two sequences of instructions are called "similar" if they perform the same // series of operations for all inputs. // // \code // %1 = add i32 %a, 10 // %2 = add i32 %a, %1 // %3 = icmp slt icmp %1, %2 // \endcode // // and // // \code // %1 = add i32 11, %a // %2 = sub i32 %a, %1 // %3 = icmp sgt icmp %2, %1 // \endcode // // ultimately have the same result, even if the inputs, and structure are // slightly different. // // For instructions, we do not worry about operands that do not have fixed // semantic meaning to the program. We consider the opcode that the instruction // has, the types, parameters, and extra information such as the function name, // or comparison predicate. These are used to create a hash to map instructions // to integers to be used in similarity matching in sequences of instructions // // Terminology: // An IRSimilarityCandidate is a region of IRInstructionData (wrapped // Instructions), usually used to denote a region of similarity has been found. // // A SimilarityGroup is a set of IRSimilarityCandidates that are structurally // similar to one another. // //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H #define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" namespace llvm { namespace IRSimilarity { struct IRInstructionDataList; /// This represents what is and is not supported when finding similarity in /// Instructions. /// /// Legal Instructions are considered when looking at similarity between /// Instructions. /// /// Illegal Instructions cannot be considered when looking for similarity /// between Instructions. They act as boundaries between similarity regions. /// /// Invisible Instructions are skipped over during analysis. // TODO: Shared with MachineOutliner enum InstrType { Legal, Illegal, Invisible }; /// This provides the utilities for hashing an Instruction to an unsigned /// integer. Two IRInstructionDatas produce the same hash value when their /// underlying Instructions perform the same operation (even if they don't have /// the same input operands.) /// As a more concrete example, consider the following: /// /// \code /// %add1 = add i32 %a, %b /// %add2 = add i32 %c, %d /// %add3 = add i64 %e, %f /// \endcode /// // Then the IRInstructionData wrappers for these Instructions may be hashed like /// so: /// /// \code /// ; These two adds have the same types and operand types, so they hash to the /// ; same number. /// %add1 = add i32 %a, %b ; Hash: 1 /// %add2 = add i32 %c, %d ; Hash: 1 /// ; This add produces an i64. This differentiates it from %add1 and %add2. So, /// ; it hashes to a different number. /// %add3 = add i64 %e, %f; Hash: 2 /// \endcode /// /// /// This hashing scheme will be used to represent the program as a very long /// string. This string can then be placed in a data structure which can be used /// for similarity queries. /// /// TODO: Handle types of Instructions which can be equal even with different /// operands. (E.g. comparisons with swapped predicates.) /// TODO: Handle CallInsts, which are only checked for function type /// by \ref isSameOperationAs. /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the /// exact same, and some do not. struct IRInstructionData : ilist_node { /// The source Instruction that is being wrapped. Instruction *Inst = nullptr; /// The values of the operands in the Instruction. SmallVector OperVals; /// The legality of the wrapped instruction. This is informed by InstrType, /// and is used when checking when two instructions are considered similar. /// If either instruction is not legal, the instructions are automatically not /// considered similar. bool Legal; /// This is only relevant if we are wrapping a CmpInst where we needed to /// change the predicate of a compare instruction from a greater than form /// to a less than form. It is None otherwise. Optional RevisedPredicate; /// Gather the information that is difficult to gather for an Instruction, or /// is changed. i.e. the operands of an Instruction and the Types of those /// operands. This extra information allows for similarity matching to make /// assertions that allow for more flexibility when checking for whether an /// Instruction performs the same operation. IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); /// Get the predicate that the compare instruction is using for hashing the /// instruction. the IRInstructionData must be wrapping a CmpInst. CmpInst::Predicate getPredicate() const; /// A function that swaps the predicates to their less than form if they are /// in a greater than form. Otherwise, the predicate is unchanged. /// /// \param CI - The comparison operation to find a consistent preidcate for. /// \return the consistent comparison predicate. static CmpInst::Predicate predicateForConsistency(CmpInst *CI); /// Hashes \p Value based on its opcode, types, and operand types. /// Two IRInstructionData instances produce the same hash when they perform /// the same operation. /// /// As a simple example, consider the following instructions. /// /// \code /// %add1 = add i32 %x1, %y1 /// %add2 = add i32 %x2, %y2 /// /// %sub = sub i32 %x1, %y1 /// /// %add_i64 = add i64 %x2, %y2 /// \endcode /// /// Because the first two adds operate the same types, and are performing the /// same action, they will be hashed to the same value. /// /// However, the subtraction instruction is not the same as an addition, and /// will be hashed to a different value. /// /// Finally, the last add has a different type compared to the first two add /// instructions, so it will also be hashed to a different value that any of /// the previous instructions. /// /// \param [in] ID - The IRInstructionData instance to be hashed. /// \returns A hash_value of the IRInstructionData. friend hash_code hash_value(const IRInstructionData &ID) { SmallVector OperTypes; for (Value *V : ID.OperVals) OperTypes.push_back(V->getType()); if (isa(ID.Inst)) return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), llvm::hash_value(ID.getPredicate()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); else if (CallInst *CI = dyn_cast(ID.Inst)) return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), llvm::hash_value(CI->getCalledFunction()->getName().str()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); } IRInstructionDataList *IDL = nullptr; }; struct IRInstructionDataList : simple_ilist {}; /// Compare one IRInstructionData class to another IRInstructionData class for /// whether they are performing a the same operation, and can mapped to the /// same value. For regular instructions if the hash value is the same, then /// they will also be close. /// /// \param A - The first IRInstructionData class to compare /// \param B - The second IRInstructionData class to compare /// \returns true if \p A and \p B are similar enough to be mapped to the same /// value. bool isClose(const IRInstructionData &A, const IRInstructionData &B); struct IRInstructionDataTraits : DenseMapInfo { static inline IRInstructionData *getEmptyKey() { return nullptr; } static inline IRInstructionData *getTombstoneKey() { return reinterpret_cast(-1); } static unsigned getHashValue(const IRInstructionData *E) { using llvm::hash_value; assert(E && "IRInstructionData is a nullptr?"); return hash_value(*E); } static bool isEqual(const IRInstructionData *LHS, const IRInstructionData *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey() || LHS == getEmptyKey() || LHS == getTombstoneKey()) return LHS == RHS; assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); return isClose(*LHS, *RHS); } }; /// Helper struct for converting the Instructions in a Module into a vector of /// unsigned integers. This vector of unsigned integers can be thought of as a /// "numeric string". This numeric string can then be queried by, for example, /// data structures that find repeated substrings. /// /// This hashing is done per BasicBlock in the module. To hash Instructions /// based off of their operations, each Instruction is wrapped in an /// IRInstructionData struct. The unsigned integer for an IRInstructionData /// depends on: /// - The hash provided by the IRInstructionData. /// - Which member of InstrType the IRInstructionData is classified as. // See InstrType for more details on the possible classifications, and how they // manifest in the numeric string. /// /// The numeric string for an individual BasicBlock is terminated by an unique /// unsigned integer. This prevents data structures which rely on repetition /// from matching across BasicBlocks. (For example, the SuffixTree.) /// As a concrete example, if we have the following two BasicBlocks: /// \code /// bb0: /// %add1 = add i32 %a, %b /// %add2 = add i32 %c, %d /// %add3 = add i64 %e, %f /// bb1: /// %sub = sub i32 %c, %d /// \endcode /// We may hash the Instructions like this (via IRInstructionData): /// \code /// bb0: /// %add1 = add i32 %a, %b ; Hash: 1 /// %add2 = add i32 %c, %d; Hash: 1 /// %add3 = add i64 %e, %f; Hash: 2 /// bb1: /// %sub = sub i32 %c, %d; Hash: 3 /// %add4 = add i32 %c, %d ; Hash: 1 /// \endcode /// And produce a "numeric string representation" like so: /// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 /// /// TODO: This is very similar to the MachineOutliner, and should be /// consolidated into the same interface. struct IRInstructionMapper { /// The starting illegal instruction number to map to. /// /// Set to -3 for compatibility with DenseMapInfo. unsigned IllegalInstrNumber = static_cast(-3); /// The next available integer to assign to a legal Instruction to. unsigned LegalInstrNumber = 0; /// Correspondence from IRInstructionData to unsigned integers. DenseMap InstructionIntegerMap; /// Set if we added an illegal number in the previous step. /// Since each illegal number is unique, we only need one of them between /// each range of legal numbers. This lets us make sure we don't add more /// than one illegal number per range. bool AddedIllegalLastTime = false; /// Marks whether we found a illegal instruction in the previous step. bool CanCombineWithPrevInstr = false; /// Marks whether we have found a set of instructions that is long enough /// to be considered for similarity. bool HaveLegalRange = false; /// This allocator pointer is in charge of holding on to the IRInstructionData /// so it is not deallocated until whatever external tool is using it is done /// with the information. SpecificBumpPtrAllocator *InstDataAllocator = nullptr; /// This allocator pointer is in charge of creating the IRInstructionDataList /// so it is not deallocated until whatever external tool is using it is done /// with the information. SpecificBumpPtrAllocator *IDLAllocator = nullptr; /// Get an allocated IRInstructionData struct using the InstDataAllocator. /// /// \param I - The Instruction to wrap with IRInstructionData. /// \param Legality - A boolean value that is true if the instruction is to /// be considered for similarity, and false if not. /// \param IDL - The InstructionDataList that the IRInstructionData is /// inserted into. /// \returns An allocated IRInstructionData struct. IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); /// Get an allocated IRInstructionDataList object using the IDLAllocator. /// /// \returns An allocated IRInstructionDataList object. IRInstructionDataList *allocateIRInstructionDataList(); IRInstructionDataList *IDL = nullptr; /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers /// determined by \p InstrType. Two Instructions are mapped to the same value /// if they are close as defined by the InstructionData class above. /// /// \param [in] BB - The BasicBlock to be mapped to integers. /// \param [in,out] InstrList - Vector of IRInstructionData to append to. /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. void convertToUnsignedVec(BasicBlock &BB, std::vector &InstrList, std::vector &IntegerMapping); /// Maps an Instruction to a legal integer. /// /// \param [in] It - The Instruction to be mapped to an integer. /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to /// append to. /// \param [in,out] InstrListForBB - Vector of InstructionData to append to. /// \returns The integer \p It was mapped to. unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector &IntegerMappingForBB, std::vector &InstrListForBB); /// Maps an Instruction to an illegal integer. /// /// \param [in] It - The \p Instruction to be mapped to an integer. /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to /// append to. /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to. /// \param End - true if creating a dummy IRInstructionData at the end of a /// basic block. /// \returns The integer \p It was mapped to. unsigned mapToIllegalUnsigned( BasicBlock::iterator &It, std::vector &IntegerMappingForBB, std::vector &InstrListForBB, bool End = false); IRInstructionMapper(SpecificBumpPtrAllocator *IDA, SpecificBumpPtrAllocator *IDLA) : InstDataAllocator(IDA), IDLAllocator(IDLA) { // Make sure that the implementation of DenseMapInfo hasn't // changed. assert(DenseMapInfo::getEmptyKey() == static_cast(-1) && "DenseMapInfo's empty key isn't -1!"); assert(DenseMapInfo::getTombstoneKey() == static_cast(-2) && "DenseMapInfo's tombstone key isn't -2!"); IDL = new (IDLAllocator->Allocate()) IRInstructionDataList(); } /// Custom InstVisitor to classify different instructions for whether it can /// be analyzed for similarity. struct InstructionClassification : public InstVisitor { InstructionClassification() {} // TODO: Determine a scheme to resolve when the label is similar enough. InstrType visitBranchInst(BranchInst &BI) { return Illegal; } // TODO: Determine a scheme to resolve when the labels are similar enough. InstrType visitPHINode(PHINode &PN) { return Illegal; } // TODO: Handle allocas. InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } // We exclude variable argument instructions since variable arguments // requires extra checking of the argument list. InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } // We exclude all exception handling cases since they are so context // dependent. InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } // DebugInfo should be included in the regions, but should not be // analyzed for similarity as it has no bearing on the outcome of the // program. InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } // TODO: Handle specific intrinsics. InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } // We only allow call instructions where the function has a name and // is not an indirect call. InstrType visitCallInst(CallInst &CI) { Function *F = CI.getCalledFunction(); if (!F || CI.isIndirectCall() || !F->hasName()) return Illegal; return Legal; } // TODO: We do not current handle similarity that changes the control flow. InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } // TODO: We do not current handle similarity that changes the control flow. InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } // TODO: Handle interblock similarity. InstrType visitTerminator(Instruction &I) { return Illegal; } InstrType visitInstruction(Instruction &I) { return Legal; } }; /// Maps an Instruction to a member of InstrType. InstructionClassification InstClassifier; }; /// This is a class that wraps a range of IRInstructionData from one point to /// another in the vector of IRInstructionData, which is a region of the /// program. It is also responsible for defining the structure within this /// region of instructions. /// /// The structure of a region is defined through a value numbering system /// assigned to each unique value in a region at the creation of the /// IRSimilarityCandidate. /// /// For example, for each Instruction we add a mapping for each new /// value seen in that Instruction. /// IR: Mapping Added: /// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 /// %add2 = add i32 %a, %1 %add2 -> 4 /// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 /// /// We can compare IRSimilarityCandidates against one another. /// The \ref isSimilar function compares each IRInstructionData against one /// another and if we have the same sequences of IRInstructionData that would /// create the same hash, we have similar IRSimilarityCandidates. /// /// We can also compare the structure of IRSimilarityCandidates. If we can /// create a mapping of registers in the region contained by one /// IRSimilarityCandidate to the region contained by different /// IRSimilarityCandidate, they can be considered structurally similar. /// /// IRSimilarityCandidate1: IRSimilarityCandidate2: /// %add1 = add i32 %a, %b %add1 = add i32 %d, %e /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 /// /// Can have the following mapping from candidate to candidate of: /// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4 /// and can be considered similar. /// /// IRSimilarityCandidate1: IRSimilarityCandidate2: /// %add1 = add i32 %a, %b %add1 = add i32 %d, c4 /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 /// /// We cannot create the same mapping since the use of c4 is not used in the /// same way as %b or c2. class IRSimilarityCandidate { private: /// The start index of this IRSimilarityCandidate in the instruction list. unsigned StartIdx = 0; /// The number of instructions in this IRSimilarityCandidate. unsigned Len = 0; /// The first instruction in this IRSimilarityCandidate. IRInstructionData *FirstInst = nullptr; /// The last instruction in this IRSimilarityCandidate. IRInstructionData *LastInst = nullptr; /// Global Value Numbering structures /// @{ /// Stores the mapping of the value to the number assigned to it in the /// IRSimilarityCandidate. DenseMap ValueToNumber; /// Stores the mapping of the number to the value assigned this number. DenseMap NumberToValue; /// @} public: /// \param StartIdx - The starting location of the region. /// \param Len - The length of the region. /// \param FirstInstIt - The starting IRInstructionData of the region. /// \param LastInstIt - The ending IRInstructionData of the region. IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt); /// \param A - The first IRInstructionCandidate to compare. /// \param B - The second IRInstructionCandidate to compare. /// \returns True when every IRInstructionData in \p A is similar to every /// IRInstructionData in \p B. static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B); /// \param A - The first IRInstructionCandidate to compare. /// \param B - The second IRInstructionCandidate to compare. /// \returns True when every IRInstructionData in \p A is structurally similar /// to \p B. static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B); struct OperandMapping { /// The IRSimilarityCandidate that holds the instruction the OperVals were /// pulled from. const IRSimilarityCandidate &IRSC; /// The operand values to be analyzed. ArrayRef &OperVals; /// The current mapping of global value numbers from one IRSimilarityCandidate /// to another IRSimilarityCandidate. DenseMap> &ValueNumberMapping; }; /// Compare the operands in \p A and \p B and check that the current mapping /// of global value numbers from \p A to \p B and \p B to \A is consistent. /// /// \param A - The first IRInstructionCandidate, operand values, and current /// operand mappings to compare. /// \param B - The second IRInstructionCandidate, operand values, and current /// operand mappings to compare. /// \returns true if the IRSimilarityCandidates operands are compatible. static bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B); /// Compare the operands in \p A and \p B and check that the current mapping /// of global value numbers from \p A to \p B and \p B to \A is consistent /// given that the operands are commutative. /// /// \param A - The first IRInstructionCandidate, operand values, and current /// operand mappings to compare. /// \param B - The second IRInstructionCandidate, operand values, and current /// operand mappings to compare. /// \returns true if the IRSimilarityCandidates operands are compatible. static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B); /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and /// the start instruction of one is greater than the start instruction of the /// other, they overlap. /// /// \returns true if the IRSimilarityCandidates do not have overlapping /// instructions. static bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B); /// \returns the number of instructions in this Candidate. unsigned getLength() const { return Len; } /// \returns the start index of this IRSimilarityCandidate. unsigned getStartIdx() const { return StartIdx; } /// \returns the end index of this IRSimilarityCandidate. unsigned getEndIdx() const { return StartIdx + Len - 1; } /// \returns The first IRInstructionData. IRInstructionData *front() const { return FirstInst; } /// \returns The last IRInstructionData. IRInstructionData *back() const { return LastInst; } /// \returns The first Instruction. Instruction *frontInstruction() { return FirstInst->Inst; } /// \returns The last Instruction Instruction *backInstruction() { return LastInst->Inst; } /// \returns The BasicBlock the IRSimilarityCandidate starts in. BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); } /// \returns The BasicBlock the IRSimilarityCandidate ends in. BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } /// \returns The Function that the IRSimilarityCandidate is located in. Function *getFunction() { return getStartBB()->getParent(); } /// Finds the positive number associated with \p V if it has been mapped. /// \param [in] V - the Value to find. /// \returns The positive number corresponding to the value. /// \returns None if not present. Optional getGVN(Value *V) { assert(V != nullptr && "Value is a nullptr?"); DenseMap::iterator VNIt = ValueToNumber.find(V); if (VNIt == ValueToNumber.end()) return None; return VNIt->second; } /// Finds the Value associate with \p Num if it exists. /// \param [in] Num - the number to find. /// \returns The Value associated with the number. /// \returns None if not present. Optional fromGVN(unsigned Num) { DenseMap::iterator VNIt = NumberToValue.find(Num); if (VNIt == NumberToValue.end()) return None; assert(VNIt->second != nullptr && "Found value is a nullptr!"); return VNIt->second; } /// \param RHS -The IRSimilarityCandidate to compare against /// \returns true if the IRSimilarityCandidate is occurs after the /// IRSimilarityCandidate in the program. bool operator<(const IRSimilarityCandidate &RHS) const { return getStartIdx() > RHS.getStartIdx(); } using iterator = IRInstructionDataList::iterator; iterator begin() const { return iterator(front()); } iterator end() const { return std::next(iterator(back())); } }; typedef std::vector SimilarityGroup; typedef std::vector SimilarityGroupList; /// This class puts all the pieces of the IRInstructionData, /// IRInstructionMapper, IRSimilarityCandidate together. /// /// It first feeds the Module or vector of Modules into the IRInstructionMapper, /// and puts all the mapped instructions into a single long list of /// IRInstructionData. /// /// The list of unsigned integers is given to the Suffix Tree or similar data /// structure to find repeated subsequences. We construct an /// IRSimilarityCandidate for each instance of the subsequence. We compare them /// against one another since These repeated subsequences can have different /// structure. For each different kind of structure found, we create a /// similarity group. /// /// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are /// structurally similar to one another, while C is different we would have two /// SimilarityGroups: /// /// SimilarityGroup 1: SimilarityGroup 2 /// A, B, D C /// /// A list of the different similarity groups is then returned after /// analyzing the module. class IRSimilarityIdentifier { public: IRSimilarityIdentifier() : Mapper(&InstDataAllocator, &InstDataListAllocator) {} /// \param M the module to find similarity in. explicit IRSimilarityIdentifier(Module &M) : Mapper(&InstDataAllocator, &InstDataListAllocator) { findSimilarity(M); } private: /// Map the instructions in the module to unsigned integers, using mapping /// already present in the Mapper if possible. /// /// \param [in] M Module - To map to integers. /// \param [in,out] InstrList - The vector to append IRInstructionData to. /// \param [in,out] IntegerMapping - The vector to append integers to. void populateMapper(Module &M, std::vector &InstrList, std::vector &IntegerMapping); /// Map the instructions in the modules vector to unsigned integers, using /// mapping already present in the mapper if possible. /// /// \param [in] Modules - The list of modules to use to populate the mapper /// \param [in,out] InstrList - The vector to append IRInstructionData to. /// \param [in,out] IntegerMapping - The vector to append integers to. void populateMapper(ArrayRef> &Modules, std::vector &InstrList, std::vector &IntegerMapping); /// Find the similarity candidates in \p InstrList and corresponding /// \p UnsignedVec /// /// \param [in,out] InstrList - The vector to append IRInstructionData to. /// \param [in,out] IntegerMapping - The vector to append integers to. /// candidates found in the program. void findCandidates(std::vector &InstrList, std::vector &IntegerMapping); public: // Find the IRSimilarityCandidates in the \p Modules and group by structural // similarity in a SimilarityGroup, each group is returned in a // SimilarityGroupList. // // \param [in] Modules - the modules to analyze. // \returns The groups of similarity ranges found in the modules. SimilarityGroupList & findSimilarity(ArrayRef> Modules); // Find the IRSimilarityCandidates in the given Module grouped by structural // similarity in a SimilarityGroup, contained inside a SimilarityGroupList. // // \param [in] M - the module to analyze. // \returns The groups of similarity ranges found in the module. SimilarityGroupList &findSimilarity(Module &M); // Clears \ref SimilarityCandidates if it is already filled by a previous run. void resetSimilarityCandidates() { // If we've already analyzed a Module or set of Modules, so we must clear // the SimilarityCandidates to make sure we do not have only old values // hanging around. if (SimilarityCandidates.hasValue()) SimilarityCandidates->clear(); else SimilarityCandidates = SimilarityGroupList(); } // \returns The groups of similarity ranges found in the most recently passed // set of modules. Optional &getSimilarity() { return SimilarityCandidates; } private: /// The allocator for IRInstructionData. SpecificBumpPtrAllocator InstDataAllocator; /// The allocator for IRInstructionDataLists. SpecificBumpPtrAllocator InstDataListAllocator; /// Map Instructions to unsigned integers and wraps the Instruction in an /// instance of IRInstructionData. IRInstructionMapper Mapper; /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; }; } // end namespace IRSimilarity /// An analysis pass based on legacy pass manager that runs and returns /// IRSimilarityIdentifier run on the Module. class IRSimilarityIdentifierWrapperPass : public ModulePass { std::unique_ptr IRSI; public: static char ID; IRSimilarityIdentifierWrapperPass(); IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; } const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; } bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; /// An analysis pass that runs and returns the IRSimilarityIdentifier run on the /// Module. class IRSimilarityAnalysis : public AnalysisInfoMixin { public: typedef IRSimilarity::IRSimilarityIdentifier Result; Result run(Module &M, ModuleAnalysisManager &); private: friend AnalysisInfoMixin; static AnalysisKey Key; }; /// Printer pass that uses \c IRSimilarityAnalysis. class IRSimilarityAnalysisPrinterPass : public PassInfoMixin { raw_ostream &OS; public: explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; } // end namespace llvm #endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H