llvm-for-llvmta/tools/clang/lib/Format/TokenAnnotator.h

//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements a token annotator, i.e. creates
/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H

#include "UnwrappedLineParser.h"
#include "clang/Format/Format.h"

namespace clang {
class SourceManager;

namespace format {

enum LineType {
  LT_Invalid,
  LT_ImportStatement,
  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
  LT_ObjCMethodDecl,
  LT_ObjCProperty, // An @property line.
  LT_Other,
  LT_PreprocessorDirective,
  LT_VirtualFunctionDecl
};

class AnnotatedLine {
public:
  AnnotatedLine(const UnwrappedLine &Line)
      : First(Line.Tokens.front().Tok), Level(Line.Level),
        MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
        MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
        InPPDirective(Line.InPPDirective),
        MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
        IsMultiVariableDeclStmt(false), Affected(false),
        LeadingEmptyLinesAffected(false), ChildrenAffected(false),
        FirstStartColumn(Line.FirstStartColumn) {
    assert(!Line.Tokens.empty());

    // Calculate Next and Previous for all tokens. Note that we must overwrite
    // Next and Previous for every token, as previous formatting runs might have
    // left them in a different state.
    First->Previous = nullptr;
    FormatToken *Current = First;
    for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
                                                      E = Line.Tokens.end();
         I != E; ++I) {
      const UnwrappedLineNode &Node = *I;
      Current->Next = I->Tok;
      I->Tok->Previous = Current;
      Current = Current->Next;
      Current->Children.clear();
      for (const auto &Child : Node.Children) {
        Children.push_back(new AnnotatedLine(Child));
        Current->Children.push_back(Children.back());
      }
    }
    Last = Current;
    Last->Next = nullptr;
  }

  ~AnnotatedLine() {
    for (unsigned i = 0, e = Children.size(); i != e; ++i) {
      delete Children[i];
    }
    FormatToken *Current = First;
    while (Current) {
      Current->Children.clear();
      Current->Role.reset();
      Current = Current->Next;
    }
  }

  /// \c true if this line starts with the given tokens in order, ignoring
  /// comments.
  template <typename... Ts> bool startsWith(Ts... Tokens) const {
    return First && First->startsSequence(Tokens...);
  }

  /// \c true if this line ends with the given tokens in reversed order,
  /// ignoring comments.
  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
  /// this line is like "... T3 T2 T1".
  template <typename... Ts> bool endsWith(Ts... Tokens) const {
    return Last && Last->endsSequence(Tokens...);
  }

  /// \c true if this line looks like a function definition instead of a
  /// function declaration. Asserts MightBeFunctionDecl.
  bool mightBeFunctionDefinition() const {
    assert(MightBeFunctionDecl);
    // Try to determine if the end of a stream of tokens is either the
    // Definition or the Declaration for a function. It does this by looking for
    // the ';' in foo(); and using that it ends with a ; to know this is the
    // Definition, however the line could end with
    //    foo(); /* comment */
    // or
    //    foo(); // comment
    // or
    //    foo() // comment
    // endsWith() ignores the comment.
    return !endsWith(tok::semi);
  }

  /// \c true if this line starts a namespace definition.
  bool startsWithNamespace() const {
    return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
           startsWith(tok::kw_inline, tok::kw_namespace) ||
           startsWith(tok::kw_export, tok::kw_namespace);
  }

  FormatToken *First;
  FormatToken *Last;

  SmallVector<AnnotatedLine *, 0> Children;

  LineType Type;
  unsigned Level;
  size_t MatchingOpeningBlockLineIndex;
  size_t MatchingClosingBlockLineIndex;
  bool InPPDirective;
  bool MustBeDeclaration;
  bool MightBeFunctionDecl;
  bool IsMultiVariableDeclStmt;

  /// \c True if this line should be formatted, i.e. intersects directly or
  /// indirectly with one of the input ranges.
  bool Affected;

  /// \c True if the leading empty lines of this line intersect with one of the
  /// input ranges.
  bool LeadingEmptyLinesAffected;

  /// \c True if one of this line's children intersects with an input range.
  bool ChildrenAffected;

  unsigned FirstStartColumn;

private:
  // Disallow copying.
  AnnotatedLine(const AnnotatedLine &) = delete;
  void operator=(const AnnotatedLine &) = delete;
};

/// Determines extra information about the tokens comprising an
/// \c UnwrappedLine.
class TokenAnnotator {
public:
  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
      : Style(Style), Keywords(Keywords) {}

  /// Adapts the indent levels of comment lines to the indent of the
  /// subsequent line.
  // FIXME: Can/should this be done in the UnwrappedLineParser?
  void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);

  void annotate(AnnotatedLine &Line);
  void calculateFormattingInformation(AnnotatedLine &Line);

private:
  /// Calculate the penalty for splitting before \c Tok.
  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
                        bool InFunctionDecl);

  bool spaceRequiredBeforeParens(const FormatToken &Right) const;

  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
                            const FormatToken &Right);

  bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);

  bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);

  bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);

  bool mustBreakForReturnType(const AnnotatedLine &Line) const;

  void printDebugInfo(const AnnotatedLine &Line);

  void calculateUnbreakableTailLengths(AnnotatedLine &Line);

  const FormatStyle &Style;

  const AdditionalKeywords &Keywords;
};

} // end namespace format
} // end namespace clang

#endif
added clang 2022-04-25 13:02:35 +02:00			`//===--- TokenAnnotator.h - Format C++ code ---------------------- C++ --===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`///`
			`/// \file`
			`/// This file implements a token annotator, i.e. creates`
			`/// \c AnnotatedTokens out of \c FormatTokens with required extra information.`
			`///`
			`//===----------------------------------------------------------------------===//`

			`#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H`
			`#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H`

			`#include "UnwrappedLineParser.h"`
			`#include "clang/Format/Format.h"`

			`namespace clang {`
			`class SourceManager;`

			`namespace format {`

			`enum LineType {`
			`LT_Invalid,`
			`LT_ImportStatement,`
			`LT_ObjCDecl, // An @interface, @implementation, or @protocol line.`
			`LT_ObjCMethodDecl,`
			`LT_ObjCProperty, // An @property line.`
			`LT_Other,`
			`LT_PreprocessorDirective,`
			`LT_VirtualFunctionDecl`
			`};`

			`class AnnotatedLine {`
			`public:`
			`AnnotatedLine(const UnwrappedLine &Line)`
			`: First(Line.Tokens.front().Tok), Level(Line.Level),`
			`MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),`
			`MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),`
			`InPPDirective(Line.InPPDirective),`
			`MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),`
			`IsMultiVariableDeclStmt(false), Affected(false),`
			`LeadingEmptyLinesAffected(false), ChildrenAffected(false),`
			`FirstStartColumn(Line.FirstStartColumn) {`
			`assert(!Line.Tokens.empty());`

			`// Calculate Next and Previous for all tokens. Note that we must overwrite`
			`// Next and Previous for every token, as previous formatting runs might have`
			`// left them in a different state.`
			`First->Previous = nullptr;`
			`FormatToken *Current = First;`
			`for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),`
			`E = Line.Tokens.end();`
			`I != E; ++I) {`
			`const UnwrappedLineNode &Node = *I;`
			`Current->Next = I->Tok;`
			`I->Tok->Previous = Current;`
			`Current = Current->Next;`
			`Current->Children.clear();`
			`for (const auto &Child : Node.Children) {`
			`Children.push_back(new AnnotatedLine(Child));`
			`Current->Children.push_back(Children.back());`
			`}`
			`}`
			`Last = Current;`
			`Last->Next = nullptr;`
			`}`

			`~AnnotatedLine() {`
			`for (unsigned i = 0, e = Children.size(); i != e; ++i) {`
			`delete Children[i];`
			`}`
			`FormatToken *Current = First;`
			`while (Current) {`
			`Current->Children.clear();`
			`Current->Role.reset();`
			`Current = Current->Next;`
			`}`
			`}`

			`/// \c true if this line starts with the given tokens in order, ignoring`
			`/// comments.`
			`template <typename... Ts> bool startsWith(Ts... Tokens) const {`
			`return First && First->startsSequence(Tokens...);`
			`}`

			`/// \c true if this line ends with the given tokens in reversed order,`
			`/// ignoring comments.`
			`/// For example, given tokens [T1, T2, T3, ...], the function returns true if`
			`/// this line is like "... T3 T2 T1".`
			`template <typename... Ts> bool endsWith(Ts... Tokens) const {`
			`return Last && Last->endsSequence(Tokens...);`
			`}`

			`/// \c true if this line looks like a function definition instead of a`
			`/// function declaration. Asserts MightBeFunctionDecl.`
			`bool mightBeFunctionDefinition() const {`
			`assert(MightBeFunctionDecl);`
			`// Try to determine if the end of a stream of tokens is either the`
			`// Definition or the Declaration for a function. It does this by looking for`
			`// the ';' in foo(); and using that it ends with a ; to know this is the`
			`// Definition, however the line could end with`
			`// foo(); /* comment */`
			`// or`
			`// foo(); // comment`
			`// or`
			`// foo() // comment`
			`// endsWith() ignores the comment.`
			`return !endsWith(tok::semi);`
			`}`

			`/// \c true if this line starts a namespace definition.`
			`bool startsWithNamespace() const {`
			`return startsWith(tok::kw_namespace) \|\| startsWith(TT_NamespaceMacro) \|\|`
			`startsWith(tok::kw_inline, tok::kw_namespace) \|\|`
			`startsWith(tok::kw_export, tok::kw_namespace);`
			`}`

			`FormatToken *First;`
			`FormatToken *Last;`

			`SmallVector<AnnotatedLine *, 0> Children;`

			`LineType Type;`
			`unsigned Level;`
			`size_t MatchingOpeningBlockLineIndex;`
			`size_t MatchingClosingBlockLineIndex;`
			`bool InPPDirective;`
			`bool MustBeDeclaration;`
			`bool MightBeFunctionDecl;`
			`bool IsMultiVariableDeclStmt;`

			`/// \c True if this line should be formatted, i.e. intersects directly or`
			`/// indirectly with one of the input ranges.`
			`bool Affected;`

			`/// \c True if the leading empty lines of this line intersect with one of the`
			`/// input ranges.`
			`bool LeadingEmptyLinesAffected;`

			`/// \c True if one of this line's children intersects with an input range.`
			`bool ChildrenAffected;`

			`unsigned FirstStartColumn;`

			`private:`
			`// Disallow copying.`
			`AnnotatedLine(const AnnotatedLine &) = delete;`
			`void operator=(const AnnotatedLine &) = delete;`
			`};`

			`/// Determines extra information about the tokens comprising an`
			`/// \c UnwrappedLine.`
			`class TokenAnnotator {`
			`public:`
			`TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)`
			`: Style(Style), Keywords(Keywords) {}`

			`/// Adapts the indent levels of comment lines to the indent of the`
			`/// subsequent line.`
			`// FIXME: Can/should this be done in the UnwrappedLineParser?`
			`void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);`

			`void annotate(AnnotatedLine &Line);`
			`void calculateFormattingInformation(AnnotatedLine &Line);`

			`private:`
			`/// Calculate the penalty for splitting before \c Tok.`
			`unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,`
			`bool InFunctionDecl);`

			`bool spaceRequiredBeforeParens(const FormatToken &Right) const;`

			`bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,`
			`const FormatToken &Right);`

			`bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);`

			`bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);`

			`bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);`

			`bool mustBreakForReturnType(const AnnotatedLine &Line) const;`

			`void printDebugInfo(const AnnotatedLine &Line);`

			`void calculateUnbreakableTailLengths(AnnotatedLine &Line);`

			`const FormatStyle &Style;`

			`const AdditionalKeywords &Keywords;`
			`};`

			`} // end namespace format`
			`} // end namespace clang`

			`#endif`