//===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/Lex/Lexer.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/MacroArgs.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include namespace { using namespace clang; using testing::ElementsAre; // The test fixture. class LexerTest : public ::testing::Test { protected: LexerTest() : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); } std::unique_ptr CreatePP(StringRef Source, TrivialModuleLoader &ModLoader) { std::unique_ptr Buf = llvm::MemoryBuffer::getMemBuffer(Source); SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, Diags, LangOpts, Target.get()); std::unique_ptr PP = std::make_unique( std::make_shared(), Diags, LangOpts, SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); PP->Initialize(*Target); PP->EnterMainSourceFile(); return PP; } std::vector Lex(StringRef Source) { TrivialModuleLoader ModLoader; auto PP = CreatePP(Source, ModLoader); std::vector toks; while (1) { Token tok; PP->Lex(tok); if (tok.is(tok::eof)) break; toks.push_back(tok); } return toks; } std::vector CheckLex(StringRef Source, ArrayRef ExpectedTokens) { auto toks = Lex(Source); EXPECT_EQ(ExpectedTokens.size(), toks.size()); for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); } return toks; } std::string getSourceText(Token Begin, Token End) { bool Invalid; StringRef Str = Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange( Begin.getLocation(), End.getLocation())), SourceMgr, LangOpts, &Invalid); if (Invalid) return ""; return std::string(Str); } FileSystemOptions FileMgrOpts; FileManager FileMgr; IntrusiveRefCntPtr DiagID; DiagnosticsEngine Diags; SourceManager SourceMgr; LangOptions LangOpts; std::shared_ptr TargetOpts; IntrusiveRefCntPtr Target; }; TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::l_paren); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_paren); std::vector toks = CheckLex("#define M(x) x\n" "M(f(M(i)))", ExpectedTokens); EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2])); } TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); std::vector toks = CheckLex("#define M(x) x\n" "M(M(i) c)", ExpectedTokens); EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0])); } TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); std::vector toks = CheckLex("#define M(x) x\n" "M(c c M(i))", ExpectedTokens); EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2])); } TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); std::vector toks = CheckLex("#define M(x) x\n" "M(M(i) c c)", ExpectedTokens); EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1])); } TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); std::vector toks = CheckLex("#define M(x) x\n" "M(c M(i)) M(M(i) c)", ExpectedTokens); EXPECT_EQ("", getSourceText(toks[1], toks[2])); } TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::l_paren); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_paren); std::vector toks = CheckLex("#define M(x) x\n" "#define C(x) M(x##c)\n" "M(f(C(i)))", ExpectedTokens); EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2])); } TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::l_paren); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_paren); std::vector toks = CheckLex("#define M(x) x\n" "f(M(M(i)))", ExpectedTokens); EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2])); } TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::l_paren); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_paren); std::vector toks = CheckLex("#define M(x) x\n" "M(f(i))", ExpectedTokens); EXPECT_EQ("i", getSourceText(toks[2], toks[2])); } TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::l_paren); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_paren); std::vector toks = CheckLex("#define M(x) x\n" "#define C(x) x\n" "f(C(M(i)))", ExpectedTokens); EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2])); } TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::l_paren); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_paren); std::vector toks = CheckLex("#define M(x) x\n" "#define C(x) c x\n" "f(C(M(i)))", ExpectedTokens); EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); } TEST_F(LexerTest, GetSourceTextExpandsRecursively) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::l_paren); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_paren); std::vector toks = CheckLex("#define M(x) x\n" "#define C(x) c M(x)\n" "C(f(M(i)))", ExpectedTokens); EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); } TEST_F(LexerTest, LexAPI) { std::vector ExpectedTokens; ExpectedTokens.push_back(tok::l_square); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_square); ExpectedTokens.push_back(tok::l_square); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::r_square); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); ExpectedTokens.push_back(tok::identifier); std::vector toks = CheckLex("#define M(x) [x]\n" "#define N(x) x\n" "#define INN(x) x\n" "#define NOF1 INN(val)\n" "#define NOF2 val\n" "M(foo) N([bar])\n" "N(INN(val)) N(NOF1) N(NOF2) N(val)", ExpectedTokens); SourceLocation lsqrLoc = toks[0].getLocation(); SourceLocation idLoc = toks[1].getLocation(); SourceLocation rsqrLoc = toks[2].getLocation(); CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc); SourceLocation Loc; EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc)); EXPECT_EQ(Loc, macroRange.getBegin()); EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts)); EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts)); EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc)); EXPECT_EQ(Loc, macroRange.getEnd()); EXPECT_TRUE(macroRange.isTokenRange()); CharSourceRange range = Lexer::makeFileCharRange( CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts); EXPECT_TRUE(range.isInvalid()); range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc), SourceMgr, LangOpts); EXPECT_TRUE(range.isInvalid()); range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), SourceMgr, LangOpts); EXPECT_TRUE(!range.isTokenRange()); EXPECT_EQ(range.getAsRange(), SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1))); StringRef text = Lexer::getSourceText( CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc), SourceMgr, LangOpts); EXPECT_EQ(text, "M(foo)"); SourceLocation macroLsqrLoc = toks[3].getLocation(); SourceLocation macroIdLoc = toks[4].getLocation(); SourceLocation macroRsqrLoc = toks[5].getLocation(); SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc); SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc); SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc); range = Lexer::makeFileCharRange( CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc), SourceMgr, LangOpts); EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)), range.getAsRange()); range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc), SourceMgr, LangOpts); EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)), range.getAsRange()); macroRange = SourceMgr.getExpansionRange(macroLsqrLoc); range = Lexer::makeFileCharRange( CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc), SourceMgr, LangOpts); EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)), range.getAsRange()); text = Lexer::getSourceText( CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)), SourceMgr, LangOpts); EXPECT_EQ(text, "[bar"); SourceLocation idLoc1 = toks[6].getLocation(); SourceLocation idLoc2 = toks[7].getLocation(); SourceLocation idLoc3 = toks[8].getLocation(); SourceLocation idLoc4 = toks[9].getLocation(); EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts)); EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts)); EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts)); EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts)); } TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) { std::vector toks = Lex("#define helper1 0\n" "void helper2(const char *, ...);\n" "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n" "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n" "void f1() { M2(\"a\", \"b\"); }"); // Check the file corresponding to the "helper1" macro arg in M2. // // The lexer used to report its size as 31, meaning that the end of the // expansion would be on the *next line* (just past `M2("a", "b")`). Make // sure that we get the correct end location (the comma after "helper1"). SourceLocation helper1ArgLoc = toks[20].getLocation(); EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); } TEST_F(LexerTest, DontOverallocateStringifyArgs) { TrivialModuleLoader ModLoader; auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader); llvm::BumpPtrAllocator Allocator; std::array ParamList; MacroInfo *MI = PP->AllocateMacroInfo({}); MI->setIsFunctionLike(); MI->setParameterList(ParamList, Allocator); EXPECT_EQ(3u, MI->getNumParams()); EXPECT_TRUE(MI->isFunctionLike()); Token Eof; Eof.setKind(tok::eof); std::vector ArgTokens; while (1) { Token tok; PP->Lex(tok); if (tok.is(tok::eof)) { ArgTokens.push_back(Eof); break; } if (tok.is(tok::comma)) ArgTokens.push_back(Eof); else ArgTokens.push_back(tok); } auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); }; std::unique_ptr MA( MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter); auto StringifyArg = [&](int ArgNo) { return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP, /*Charify=*/false, {}, {}); }; Token Result = StringifyArg(0); EXPECT_EQ(tok::string_literal, Result.getKind()); EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData()); Result = StringifyArg(1); EXPECT_EQ(tok::string_literal, Result.getKind()); EXPECT_STREQ("\"5\"", Result.getLiteralData()); Result = StringifyArg(2); EXPECT_EQ(tok::string_literal, Result.getKind()); EXPECT_STREQ("\"'C'\"", Result.getLiteralData()); #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST EXPECT_DEATH(StringifyArg(3), "Invalid arg #"); #endif } TEST_F(LexerTest, IsNewLineEscapedValid) { auto hasNewLineEscaped = [](const char *S) { return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); }; EXPECT_TRUE(hasNewLineEscaped("\\\r")); EXPECT_TRUE(hasNewLineEscaped("\\\n")); EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); EXPECT_FALSE(hasNewLineEscaped("\r")); EXPECT_FALSE(hasNewLineEscaped("\n")); EXPECT_FALSE(hasNewLineEscaped("\r\n")); EXPECT_FALSE(hasNewLineEscaped("\n\r")); EXPECT_FALSE(hasNewLineEscaped("\r\r")); EXPECT_FALSE(hasNewLineEscaped("\n\n")); } TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { // Each line should have the same length for // further offset calculation to be more straightforward. const unsigned IdentifierLength = 8; std::string TextToLex = "rabarbar\n" "foo\\\nbar\n" "foo\\\rbar\n" "fo\\\r\nbar\n" "foo\\\n\rba\n"; std::vector ExpectedTokens{5, tok::identifier}; std::vector LexedTokens = CheckLex(TextToLex, ExpectedTokens); for (const Token &Tok : LexedTokens) { std::pair OriginalLocation = SourceMgr.getDecomposedLoc(Tok.getLocation()); for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { SourceLocation LookupLocation = Tok.getLocation().getLocWithOffset(Offset); std::pair FoundLocation = SourceMgr.getDecomposedExpansionLoc( Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); // Check that location returned by the GetBeginningOfToken // is the same as original token location reported by Lexer. EXPECT_EQ(FoundLocation.second, OriginalLocation.second); } } } TEST_F(LexerTest, AvoidPastEndOfStringDereference) { EXPECT_TRUE(Lex(" // \\\n").empty()); EXPECT_TRUE(Lex("#include <\\\\").empty()); EXPECT_TRUE(Lex("#include <\\\\\n").empty()); } TEST_F(LexerTest, StringizingRasString) { // For "std::string Lexer::Stringify(StringRef Str, bool Charify)". std::string String1 = R"(foo {"bar":[]} baz)"; // For "void Lexer::Stringify(SmallVectorImpl &Str)". SmallString<128> String2; String2 += String1.c_str(); // Corner cases. std::string String3 = R"(\ \n \\n \\)"; SmallString<128> String4; String4 += String3.c_str(); std::string String5 = R"(a\ \\b)"; SmallString<128> String6; String6 += String5.c_str(); String1 = Lexer::Stringify(StringRef(String1)); Lexer::Stringify(String2); String3 = Lexer::Stringify(StringRef(String3)); Lexer::Stringify(String4); String5 = Lexer::Stringify(StringRef(String5)); Lexer::Stringify(String6); EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)"); EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)"); EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)"); EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)"); EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)"); EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)"); } TEST_F(LexerTest, CharRangeOffByOne) { std::vector toks = Lex(R"(#define MOO 1 void foo() { MOO; })"); const Token &moo = toks[5]; EXPECT_EQ(getSourceText(moo, moo), "MOO"); SourceRange R{moo.getLocation(), moo.getLocation()}; EXPECT_TRUE( Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts)); EXPECT_TRUE( Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts)); CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts); EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO". } TEST_F(LexerTest, FindNextToken) { Lex("int abcd = 0;\n" "int xyz = abcd;\n"); std::vector GeneratedByNextToken; SourceLocation Loc = SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID()); while (true) { auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts); ASSERT_TRUE(T.hasValue()); if (T->is(tok::eof)) break; GeneratedByNextToken.push_back(getSourceText(*T, *T)); Loc = T->getLocation(); } EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int", "xyz", "=", "abcd", ";")); } TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) { TrivialModuleLoader ModLoader; auto PP = CreatePP("", ModLoader); while (1) { Token tok; PP->Lex(tok); if (tok.is(tok::eof)) break; } EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()), 1U); } } // anonymous namespace