From 5e6e0e6c9080e5e13b4ff9d222651327f2b47e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20H=C3=B6lscher?= Date: Tue, 3 May 2022 11:13:37 +0200 Subject: [PATCH] first Version of Loop Unrolling --- .vscode/launch.json | 4 +- CacheAnalysisPass/CacheAnalysisPass.cpp | 10 +- helper.sh | 70 +++--- include/AbstractCache.h | 317 ++++++++++++++++++++---- 4 files changed, 304 insertions(+), 97 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bfa1036..eb3a209 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -7,13 +7,13 @@ { "type": "lldb", "request": "launch", - "name": "LLDB cnt", + "name": "LLDB Unrolling", "program": "/usr/bin/opt", "args": [ "-load-pass-plugin", "${workspaceFolder}/build/libCacheAnalysisPass.so", "-passes=lru-misses", - "${workspaceFolder}/test/cnt.ll", + "${workspaceFolder}/test/fft1.ll", "-o", "/dev/null" ], diff --git a/CacheAnalysisPass/CacheAnalysisPass.cpp b/CacheAnalysisPass/CacheAnalysisPass.cpp index 4e253d0..f0d85e3 100644 --- a/CacheAnalysisPass/CacheAnalysisPass.cpp +++ b/CacheAnalysisPass/CacheAnalysisPass.cpp @@ -56,6 +56,7 @@ struct CacheAnalysisPass : PassInfoMixin { bool PrintEdgesPost = false; bool DumpToDot = true; bool DumpNodes = false; + bool LoopUnrolling = true; // Assume a 4kB Cache // with 16 Sets, associativity of 4 and Cachelines fitting two @@ -228,14 +229,15 @@ struct CacheAnalysisPass : PassInfoMixin { if (PrintAddresses) addressPrinter(F); } + if(LoopUnrolling) + AC.unrollLoops(); + AC.fillAbstractCache(EntryAddress); + if (DumpNodes) + AC.dumpNodes(); if (PrintEdgesPost) AC.dumpEdges(); if (DumpToDot) AC.dumpDotFile(); - AC.unrollLoops(); - AC.fillAbstractCache(EntryAddress); - if (DumpNodes) - AC.dumpNodes(); outs() << "MustHits: " << AC.collectHits() << "\n"; outs() << "MayMisses: " << AC.collectMisses() << "\n"; return PreservedAnalyses::all(); diff --git a/helper.sh b/helper.sh index 6693d95..ee575c6 100755 --- a/helper.sh +++ b/helper.sh @@ -114,46 +114,36 @@ case $1 in echo "Currently not available!" echo "But please continue to implement the must join," echo "to the best of your abilities and check for updates!" - # run "fft1" - # echo "==== Correct fft1 ====" - # echo "MustHits: 16" - # echo "MayMisses: 280" - # echo - # run "bsort100" - # echo "==== Correct bsort100 ====" - # echo "MustHits: 1" - # echo "MayMisses: 41" - # echo - # run "lms" - # echo "==== Correct lms ====" - # echo "MustHits: 5" - # echo "MayMisses: 288" - # echo - # run "minver" - # echo "==== Correct minver ====" - # echo "MustHits: 6" - # echo "MayMisses: 224" - # echo - # run "qsort-exam" - # echo "==== Correct qsort-exam ====" - # echo "MustHits: 2" - # echo "MayMisses: 152" - # echo - # run "recursion" - # echo "==== Correct recursion ====" - # echo "MustHits: 8" - # echo "MayMisses: 8" - # echo - # run "select" - # echo "==== Correct select ====" - # echo "MustHits: 4" - # echo "MayMisses: 108" - # echo - # run "whet" - # echo "==== Correct whet ====" - # echo "MustHits: 5" - # echo "MayMisses: 265" - # echo + run "fft1" + echo "==== Correct fft1 ====" + echo "MustHits: 16" + echo "MayMisses: 280" + echo + run "cnt" + echo "==== Correct cnt ====" + echo "MustHits: x" + echo "MayMisses: xx" + echo + run "crc" + echo "==== Correct crc ====" + echo "MustHits: x" + echo "MayMisses: xx" + echo + run "duff" + echo "==== Correct duff ====" + echo "MustHits: x" + echo "MayMisses: xx" + echo + run "insertsort" + echo "==== Correct insertsort ====" + echo "MustHits: x" + echo "MayMisses: xx" + echo + run "matmult" + echo "==== Correct matmult ====" + echo "MustHits: x" + echo "MayMisses: x" + echo ;; a | all) clean diff --git a/include/AbstractCache.h b/include/AbstractCache.h index 48df8e7..bc522c9 100644 --- a/include/AbstractCache.h +++ b/include/AbstractCache.h @@ -30,7 +30,6 @@ public: // everything is public, because IDGAF // map keys are instruction Addresses. std::map> Edges; std::map Nodes; - unsigned int NumberOfNodes = 0; AbstractCache() {} @@ -46,9 +45,120 @@ public: // everything is public, because IDGAF Nodes[Suc].Predecessors.push_back(Pre); } + /** + * @brief Add an Edge to the AbstractStateGraph + * + * @param Pre + * @param Suc + */ + void removeEdge(unsigned int Pre, unsigned int Suc) { + Edges[Pre].remove(Suc); + Nodes[Pre].Successors.remove(Suc); + Nodes[Suc].Predecessors.remove(Pre); + } + + /** + * @brief Add an Empty node @NodeAddr + * + * @param NodeAddr + * @return unsigned int + */ unsigned int addEmptyNode(unsigned int NodeAddr) { - Nodes[NumberOfNodes++] = AbstractState(NodeAddr); - return NumberOfNodes; + int I = Nodes.size(); + Nodes[I] = AbstractState(NodeAddr); + return I; + } + + /** + * @brief Returns True if a path From -> To exists. + * + * @param From + * @param To + * @return true + * @return false + */ + bool findPath(unsigned int From, unsigned int To) { + std::map Visited; + Visited[From] = false; + bool Ret = false; + for (auto Visitor : Visited) { + if (!Visitor.second) { + for (unsigned int Next : Edges[Visitor.first]) { + if (Next == To) { + return true; + } + Visited[Next] = false; + } + } + Visited[Visitor.first] = true; + } + return false; + } + + /** + * @brief Removes all Nested loops from the handed LoopBody + * + * @param LoopBodyIn + * @param OrigNodeToUnrolledNode + */ + void removeNestedLoops( + std::list LoopBodyIn, + std::map OrigNodeToUnrolledNode) { + unsigned int LoopHead = LoopBodyIn.front(); + unsigned int LoopTail = LoopBodyIn.back(); + unsigned int NestLoopTail; + for (unsigned int NodeNr : LoopBodyIn) { + bool IsLoopHead = false; + bool FoundLoopBody = false; + unsigned int LoopBodySize = 0; + int NestLoopHead = 0; + NestLoopHead = NodeNr; + if (Nodes[NodeNr].Predecessors.size() > 1) { + IsLoopHead = true; + FoundLoopBody = false; + LoopBodySize++; + // is loop head? + for (unsigned int Pre : Nodes[NodeNr].Predecessors) { + if (Pre > NodeNr) { + // Might be loop head. + // check if all States between Pre and NodeNr are a coherent set. + for (unsigned int I = NodeNr; I < Pre; I++) { + // Check if all out going edges are in the set + for (unsigned int Succ : Nodes[I].Successors) { + if (Succ > Pre) { + // Set is not coherent + IsLoopHead = false; + break; + } + } + // check if all incoming edges are in the set. + if (IsLoopHead && I != NodeNr) + for (unsigned int Pred : Nodes[I].Predecessors) { + if (Pred < NodeNr) { + // Set is not coherent + IsLoopHead = false; + break; + } + } + FoundLoopBody = true; + LoopBodySize++; + } + NestLoopTail = Pre; + } else if (!FoundLoopBody) { + // If no coherent Loopbody exist we cannot unroll. + NestLoopHead = 0; + IsLoopHead = false; + } + if (FoundLoopBody) { + // Check if a Path between Head and Tail exists, + // if not its not a loop. + if (findPath(NestLoopHead, NestLoopTail)) + removeEdge(OrigNodeToUnrolledNode[NestLoopTail], + OrigNodeToUnrolledNode[NestLoopHead]); + } + } + } + } } /** @@ -57,15 +167,23 @@ public: // everything is public, because IDGAF * @param NodeNr */ void unrollLoops() { - for (auto NodePair : Nodes) { - unsigned int NodeNr = NodePair.first; - if (NodeNr == 34) { - llvm::outs() << "HI\n"; + unsigned int NestedBorder = 0; + unsigned int LastNode = Nodes.size(); + unsigned int IterationCounter = 0; + for (std::pair NodePair : Nodes) { + IterationCounter++; + if (NodePair.first == LastNode) { + break; } + unsigned int NodeNr = NodePair.first; + // Don't unroll nested loops + if (NodeNr < NestedBorder) + continue; bool IsLoopHead = false; bool FoundLoopBody = false; - bool Verbose = true; + bool Verbose = false; std::list LoopBody; + std::list AdditionalLoopTails; if (Nodes[NodeNr].Predecessors.size() > 1) { IsLoopHead = true; // is loop head? @@ -73,15 +191,42 @@ public: // everything is public, because IDGAF if (Pre > NodeNr) { // Might be loop head. // check if all States between Pre and NodeNr are a coherent set. - for (uint I = NodeNr; I < Pre; I++) { - LoopBody.push_back(I); - for (uint Succ : Nodes[I].Successors) { - if (Succ > Pre) { - // Set is not coherent - IsLoopHead = false; - break; + for (unsigned int I = NodeNr; I < Pre; I++) { + // Check if all out going edges are in the set + for (unsigned int Succ : Nodes[I].Successors) { + for (unsigned int PreI : Nodes[I].Predecessors) { + // Handle if we have multiple Loopheads. + if (PreI >= Pre && I != NodeNr) { + // I and Pre are Looptail. + { + if (std::find(AdditionalLoopTails.begin(), + AdditionalLoopTails.end(), + I) == AdditionalLoopTails.end()) { + AdditionalLoopTails.push_back(I); + break; + } + } + } + if (std::find(LoopBody.begin(), LoopBody.end(), I) == + LoopBody.end()) + LoopBody.push_back(I); + + if (Succ > Pre) { + // Set is not coherent + IsLoopHead = false; + break; + } } } + // check if all incoming edges are in the set. + if (IsLoopHead && I != NodeNr) + for (unsigned int Pred : Nodes[I].Predecessors) { + if (Pred < NodeNr) { + // Set is not coherent + IsLoopHead = false; + break; + } + } FoundLoopBody = true; } LoopBody.push_back(Pre); @@ -92,45 +237,91 @@ public: // everything is public, because IDGAF } } } - if (IsLoopHead && Verbose) { - llvm::outs() << "Found LoopHead @: " << NodeNr << "\n"; - llvm::outs() << "With Body: {\n"; - int I = 1; - for (auto Node : LoopBody) { - llvm::outs() << Node << ", "; - if (!(I++ % 5)) { - llvm::outs() << "\n"; - } - } - llvm::outs() << "}\n"; - } // Found Loop Head and Body! - // TODO: Now unroll // Add empty unrolled Nodes // Map points from OrigNode To Unrolled Node. - std::map OrigNodeToUnrolledNode; - for (auto Node : LoopBody) { - // Node to unroll - AbstractState UnrolledNode(Nodes[Node]); - UnrolledNode.setUnrolled(1); - Nodes[NumberOfNodes++] = UnrolledNode; - OrigNodeToUnrolledNode[Node] = NumberOfNodes; - } - - unsigned int LoopHead = LoopBody.front(); - LoopBody.pop_front(); - unsigned int LoopTail = LoopBody.back(); - LoopBody.pop_back(); - for (auto Node : LoopBody) { - for (auto Succ : Nodes[Node].Successors) { - // Add All successors to unrolled Node - Nodes[OrigNodeToUnrolledNode[Node]].Successors.push_back( - OrigNodeToUnrolledNode[Succ]); + if (FoundLoopBody) { + std::map OrigNodeToUnrolledNode; + for (unsigned int Node : LoopBody) { + // Node to unroll + AbstractState UnrolledNode(Nodes[Node]); + UnrolledNode.setUnrolled(1); + unsigned int I = Nodes.size(); + Nodes[I] = UnrolledNode; + OrigNodeToUnrolledNode[Node] = I; + assert(Nodes[OrigNodeToUnrolledNode[Node]].Unrolled == 1); + assert(Nodes[Node].Addr == Nodes[OrigNodeToUnrolledNode[Node]].Addr); } - for (auto Pre : Nodes[Node].Predecessors) { - // Add All predecessors to unrolled Node - Nodes[OrigNodeToUnrolledNode[Node]].Successors.push_back( - OrigNodeToUnrolledNode[Pre]); + + // LoopTail and Head have to be processed different + unsigned int LoopTail = LoopBody.back(); + LoopBody.pop_back(); + NestedBorder = LoopTail; + unsigned int LoopHead = LoopBody.front(); + LoopBody.pop_front(); + + // Find State entering to LoopHead () + unsigned int LoopHeadEntry = 0; + for (unsigned int Pre : Nodes[LoopHead].Predecessors) { + if (Pre < LoopHead) { + LoopHeadEntry = Pre; + break; + } + } + + // Make LoopHeadEntry point to unrolled state instead of the loop. + addEdge(LoopHeadEntry, OrigNodeToUnrolledNode[LoopHead]); + removeEdge(LoopHeadEntry, LoopHead); + // Connect unrolled Loop to the the original Loop. + if (AdditionalLoopTails.size() == 0) + addEdge(OrigNodeToUnrolledNode[LoopTail], LoopHead); + for (auto Tail : AdditionalLoopTails) + addEdge(OrigNodeToUnrolledNode[Tail], LoopHead); + + // Fix all other states + addEdge(OrigNodeToUnrolledNode[LoopBody.back()], + OrigNodeToUnrolledNode[LoopTail]); + for (unsigned int Node : LoopBody) { + for (unsigned int Pre : Nodes[Node].Predecessors) { + // if (std::find(LoopBody.begin(), LoopBody.end(), Pre) != + // LoopBody.end()) + // Add All predecessors and successors to unrolled Nodes + addEdge(OrigNodeToUnrolledNode[Pre], OrigNodeToUnrolledNode[Node]); + } + } + + // Remove Nested loops in unrolled loop + removeNestedLoops(LoopBody, OrigNodeToUnrolledNode); + + if (Verbose && FoundLoopBody) { + llvm::outs() << "Found LoopHead @: " << NodeNr << "\n"; + llvm::outs() << "With LoopTail @: " << LoopTail << "\n"; + llvm::outs() << "With Body: {\n"; + int I = 1; + for (auto Node : LoopBody) { + llvm::outs() << Node << ", "; + if (!(I++ % 5)) { + llvm::outs() << "\n"; + } + } + llvm::outs() << "}\n"; + llvm::outs() << "Unrolled States: {\n"; + I = 1; + for (auto Node : LoopBody) { + llvm::outs() << OrigNodeToUnrolledNode[Node] << ", "; + if (!(I++ % 5)) { + llvm::outs() << "\n"; + } + } + llvm::outs() << "}\n"; + I = 1; + llvm::outs() << "OrigNodeToUnrolledNode: {\n"; + for (auto Nr : OrigNodeToUnrolledNode) { + llvm::outs() << Nr.first << "->" << Nr.second << ", "; + if (!(I++ % 3)) + llvm::outs() << "\n"; + } + llvm::outs() << "}\n"; } } } @@ -144,7 +335,6 @@ public: // everything is public, because IDGAF */ void fillAbstractCache(unsigned int NodeNr) { // if(isLoopHead(NodeNr)) - // unrollLoop(NodeNr); Nodes[NodeNr].Computed = true; for (unsigned int SuccNr : Nodes[NodeNr].Successors) { Nodes[SuccNr]; @@ -162,6 +352,11 @@ public: // everything is public, because IDGAF return; } + /** + * @brief Return number of measured Hits + * + * @return unsigned int + */ unsigned int collectHits() { unsigned int Hits = 0; for (auto const &E : Edges) { @@ -174,6 +369,11 @@ public: // everything is public, because IDGAF return Hits; } + /** + * @brief Return number of measured Misses + * + * @return unsigned int + */ unsigned int collectMisses() { unsigned int Misses = 0; for (auto const &E : Edges) { @@ -186,6 +386,10 @@ public: // everything is public, because IDGAF return Misses; } + /** + * @brief Prints all Edges to Console + * + */ void dumpEdges() { llvm::outs() << "Dumping Edges:\n"; for (auto const &E : Edges) { @@ -203,6 +407,10 @@ public: // everything is public, because IDGAF } } + /** + * @brief Dumps the Graph to a out.dot file + * + */ void dumpDotFile() { bool PrintOld = true; std::ofstream DotFile; @@ -213,6 +421,9 @@ public: // everything is public, because IDGAF for (unsigned int To : E.second) { if (PrintOld) { DotFile << E.first << " -> " << To << "\n"; + if (Nodes[E.first].Unrolled) { + DotFile << E.first << " [color = red]\n"; + } } else { DotFile << Nodes[E.first].Addr << "." << Nodes[E.first].Unrolled << " -> " << Nodes[To].Addr << "." << Nodes[To].Unrolled @@ -224,6 +435,10 @@ public: // everything is public, because IDGAF DotFile.close(); } + /** + * @brief Prints all nodes to Console + * + */ void dumpNodes() { for (auto const &E : Edges) { Nodes[E.first].dump();