llvm-for-llvmta/tools/clang/lib/Format/FormatTokenLexer.h

//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains FormatTokenLexer, which tokenizes a source file
/// into a token stream suitable for ClangFormat.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H

#include "Encoding.h"
#include "FormatToken.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Regex.h"

#include <stack>

namespace clang {
namespace format {

enum LexerState {
  NORMAL,
  TEMPLATE_STRING,
  TOKEN_STASHED,
};

class FormatTokenLexer {
public:
  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
                   const FormatStyle &Style, encoding::Encoding Encoding,
                   llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
                   IdentifierTable &IdentTable);

  ArrayRef<FormatToken *> lex();

  const AdditionalKeywords &getKeywords() { return Keywords; }

private:
  void tryMergePreviousTokens();

  bool tryMergeLessLess();
  bool tryMergeNSStringLiteral();
  bool tryMergeJSPrivateIdentifier();
  bool tryMergeCSharpStringLiteral();
  bool tryMergeCSharpKeywordVariables();
  bool tryMergeCSharpDoubleQuestion();
  bool tryMergeCSharpNullConditional();
  bool tryTransformCSharpForEach();
  bool tryMergeForEach();
  bool tryTransformTryUsageForC();

  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);

  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
  bool precedesOperand(FormatToken *Tok);

  bool canPrecedeRegexLiteral(FormatToken *Prev);

  // Tries to parse a JavaScript Regex literal starting at the current token,
  // if that begins with a slash and is in a location where JavaScript allows
  // regex literals. Changes the current token to a regex literal and updates
  // its text if successful.
  void tryParseJSRegexLiteral();

  // Handles JavaScript template strings.
  //
  // JavaScript template strings use backticks ('`') as delimiters, and allow
  // embedding expressions nested in ${expr-here}. Template strings can be
  // nested recursively, i.e. expressions can contain template strings in turn.
  //
  // The code below parses starting from a backtick, up to a closing backtick or
  // an opening ${. It also maintains a stack of lexing contexts to handle
  // nested template parts by balancing curly braces.
  void handleTemplateStrings();

  void handleCSharpVerbatimAndInterpolatedStrings();

  void tryParsePythonComment();

  bool tryMerge_TMacro();

  bool tryMergeConflictMarkers();

  FormatToken *getStashedToken();

  FormatToken *getNextToken();

  FormatToken *FormatTok;
  bool IsFirstToken;
  std::stack<LexerState> StateStack;
  unsigned Column;
  unsigned TrailingWhitespace;
  std::unique_ptr<Lexer> Lex;
  const SourceManager &SourceMgr;
  FileID ID;
  const FormatStyle &Style;
  IdentifierTable &IdentTable;
  AdditionalKeywords Keywords;
  encoding::Encoding Encoding;
  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
  // Index (in 'Tokens') of the last token that starts a new line.
  unsigned FirstInLineIndex;
  SmallVector<FormatToken *, 16> Tokens;

  llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;

  bool FormattingDisabled;

  llvm::Regex MacroBlockBeginRegex;
  llvm::Regex MacroBlockEndRegex;

  // Targets that may appear inside a C# attribute.
  static const llvm::StringSet<> CSharpAttributeTargets;

  void readRawToken(FormatToken &Tok);

  void resetLexer(unsigned Offset);
};

} // namespace format
} // namespace clang

#endif
added clang 2022-04-25 13:02:35 +02:00			`//===--- FormatTokenLexer.h - Format C++ code ----------------- C++ -----===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`///`
			`/// \file`
			`/// This file contains FormatTokenLexer, which tokenizes a source file`
			`/// into a token stream suitable for ClangFormat.`
			`///`
			`//===----------------------------------------------------------------------===//`

			`#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H`
			`#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H`

			`#include "Encoding.h"`
			`#include "FormatToken.h"`
			`#include "clang/Basic/SourceLocation.h"`
			`#include "clang/Basic/SourceManager.h"`
			`#include "clang/Format/Format.h"`
			`#include "llvm/ADT/MapVector.h"`
			`#include "llvm/ADT/StringSet.h"`
			`#include "llvm/Support/Regex.h"`

			`#include <stack>`

			`namespace clang {`
			`namespace format {`

			`enum LexerState {`
			`NORMAL,`
			`TEMPLATE_STRING,`
			`TOKEN_STASHED,`
			`};`

			`class FormatTokenLexer {`
			`public:`
			`FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,`
			`const FormatStyle &Style, encoding::Encoding Encoding,`
			`llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,`
			`IdentifierTable &IdentTable);`

			`ArrayRef<FormatToken *> lex();`

			`const AdditionalKeywords &getKeywords() { return Keywords; }`

			`private:`
			`void tryMergePreviousTokens();`

			`bool tryMergeLessLess();`
			`bool tryMergeNSStringLiteral();`
			`bool tryMergeJSPrivateIdentifier();`
			`bool tryMergeCSharpStringLiteral();`
			`bool tryMergeCSharpKeywordVariables();`
			`bool tryMergeCSharpDoubleQuestion();`
			`bool tryMergeCSharpNullConditional();`
			`bool tryTransformCSharpForEach();`
			`bool tryMergeForEach();`
			`bool tryTransformTryUsageForC();`

			`bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);`

			`// Returns \c true if \p Tok can only be followed by an operand in JavaScript.`
			`bool precedesOperand(FormatToken *Tok);`

			`bool canPrecedeRegexLiteral(FormatToken *Prev);`

			`// Tries to parse a JavaScript Regex literal starting at the current token,`
			`// if that begins with a slash and is in a location where JavaScript allows`
			`// regex literals. Changes the current token to a regex literal and updates`
			`// its text if successful.`
			`void tryParseJSRegexLiteral();`

			`// Handles JavaScript template strings.`
			`//`
			// JavaScript template strings use backticks ('`') as delimiters, and allow
			`// embedding expressions nested in ${expr-here}. Template strings can be`
			`// nested recursively, i.e. expressions can contain template strings in turn.`
			`//`
			`// The code below parses starting from a backtick, up to a closing backtick or`
			`// an opening ${. It also maintains a stack of lexing contexts to handle`
			`// nested template parts by balancing curly braces.`
			`void handleTemplateStrings();`

			`void handleCSharpVerbatimAndInterpolatedStrings();`

			`void tryParsePythonComment();`

			`bool tryMerge_TMacro();`

			`bool tryMergeConflictMarkers();`

			`FormatToken *getStashedToken();`

			`FormatToken *getNextToken();`

			`FormatToken *FormatTok;`
			`bool IsFirstToken;`
			`std::stack<LexerState> StateStack;`
			`unsigned Column;`
			`unsigned TrailingWhitespace;`
			`std::unique_ptr<Lexer> Lex;`
			`const SourceManager &SourceMgr;`
			`FileID ID;`
			`const FormatStyle &Style;`
			`IdentifierTable &IdentTable;`
			`AdditionalKeywords Keywords;`
			`encoding::Encoding Encoding;`
			`llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;`
			`// Index (in 'Tokens') of the last token that starts a new line.`
			`unsigned FirstInLineIndex;`
			`SmallVector<FormatToken *, 16> Tokens;`

			`llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;`

			`bool FormattingDisabled;`

			`llvm::Regex MacroBlockBeginRegex;`
			`llvm::Regex MacroBlockEndRegex;`

			`// Targets that may appear inside a C# attribute.`
			`static const llvm::StringSet<> CSharpAttributeTargets;`

			`void readRawToken(FormatToken &Tok);`

			`void resetLexer(unsigned Offset);`
			`};`

			`} // namespace format`
			`} // namespace clang`

			`#endif`