179 lines
5.0 KiB
C++
179 lines
5.0 KiB
C++
//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a glob pattern matcher.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/GlobPattern.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/Errc.h"
|
|
|
|
using namespace llvm;
|
|
|
|
static bool hasWildcard(StringRef S) {
|
|
return S.find_first_of("?*[\\") != StringRef::npos;
|
|
}
|
|
|
|
// Expands character ranges and returns a bitmap.
|
|
// For example, "a-cf-hz" is expanded to "abcfghz".
|
|
static Expected<BitVector> expand(StringRef S, StringRef Original) {
|
|
BitVector BV(256, false);
|
|
|
|
// Expand X-Y.
|
|
for (;;) {
|
|
if (S.size() < 3)
|
|
break;
|
|
|
|
uint8_t Start = S[0];
|
|
uint8_t End = S[2];
|
|
|
|
// If it doesn't start with something like X-Y,
|
|
// consume the first character and proceed.
|
|
if (S[1] != '-') {
|
|
BV[Start] = true;
|
|
S = S.substr(1);
|
|
continue;
|
|
}
|
|
|
|
// It must be in the form of X-Y.
|
|
// Validate it and then interpret the range.
|
|
if (Start > End)
|
|
return make_error<StringError>("invalid glob pattern: " + Original,
|
|
errc::invalid_argument);
|
|
|
|
for (int C = Start; C <= End; ++C)
|
|
BV[(uint8_t)C] = true;
|
|
S = S.substr(3);
|
|
}
|
|
|
|
for (char C : S)
|
|
BV[(uint8_t)C] = true;
|
|
return BV;
|
|
}
|
|
|
|
// This is a scanner for the glob pattern.
|
|
// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
|
|
// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
|
|
// equivalent to "[^<chars>]"), or a non-meta character.
|
|
// This function returns the first token in S.
|
|
static Expected<BitVector> scan(StringRef &S, StringRef Original) {
|
|
switch (S[0]) {
|
|
case '*':
|
|
S = S.substr(1);
|
|
// '*' is represented by an empty bitvector.
|
|
// All other bitvectors are 256-bit long.
|
|
return BitVector();
|
|
case '?':
|
|
S = S.substr(1);
|
|
return BitVector(256, true);
|
|
case '[': {
|
|
// ']' is allowed as the first character of a character class. '[]' is
|
|
// invalid. So, just skip the first character.
|
|
size_t End = S.find(']', 2);
|
|
if (End == StringRef::npos)
|
|
return make_error<StringError>("invalid glob pattern: " + Original,
|
|
errc::invalid_argument);
|
|
|
|
StringRef Chars = S.substr(1, End - 1);
|
|
S = S.substr(End + 1);
|
|
if (Chars.startswith("^") || Chars.startswith("!")) {
|
|
Expected<BitVector> BV = expand(Chars.substr(1), Original);
|
|
if (!BV)
|
|
return BV.takeError();
|
|
return BV->flip();
|
|
}
|
|
return expand(Chars, Original);
|
|
}
|
|
case '\\':
|
|
// Eat this character and fall through below to treat it like a non-meta
|
|
// character.
|
|
S = S.substr(1);
|
|
LLVM_FALLTHROUGH;
|
|
default:
|
|
BitVector BV(256, false);
|
|
BV[(uint8_t)S[0]] = true;
|
|
S = S.substr(1);
|
|
return BV;
|
|
}
|
|
}
|
|
|
|
Expected<GlobPattern> GlobPattern::create(StringRef S) {
|
|
GlobPattern Pat;
|
|
|
|
// S doesn't contain any metacharacter,
|
|
// so the regular string comparison should work.
|
|
if (!hasWildcard(S)) {
|
|
Pat.Exact = S;
|
|
return Pat;
|
|
}
|
|
|
|
// S is something like "foo*", and the "* is not escaped. We can use
|
|
// startswith().
|
|
if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
|
|
Pat.Prefix = S.drop_back();
|
|
return Pat;
|
|
}
|
|
|
|
// S is something like "*foo". We can use endswith().
|
|
if (S.startswith("*") && !hasWildcard(S.drop_front())) {
|
|
Pat.Suffix = S.drop_front();
|
|
return Pat;
|
|
}
|
|
|
|
// Otherwise, we need to do real glob pattern matching.
|
|
// Parse the pattern now.
|
|
StringRef Original = S;
|
|
while (!S.empty()) {
|
|
Expected<BitVector> BV = scan(S, Original);
|
|
if (!BV)
|
|
return BV.takeError();
|
|
Pat.Tokens.push_back(*BV);
|
|
}
|
|
return Pat;
|
|
}
|
|
|
|
bool GlobPattern::match(StringRef S) const {
|
|
if (Exact)
|
|
return S == *Exact;
|
|
if (Prefix)
|
|
return S.startswith(*Prefix);
|
|
if (Suffix)
|
|
return S.endswith(*Suffix);
|
|
return matchOne(Tokens, S);
|
|
}
|
|
|
|
// Runs glob pattern Pats against string S.
|
|
bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
|
|
for (;;) {
|
|
if (Pats.empty())
|
|
return S.empty();
|
|
|
|
// If Pats[0] is '*', try to match Pats[1..] against all possible
|
|
// tail strings of S to see at least one pattern succeeds.
|
|
if (Pats[0].size() == 0) {
|
|
Pats = Pats.slice(1);
|
|
if (Pats.empty())
|
|
// Fast path. If a pattern is '*', it matches anything.
|
|
return true;
|
|
for (size_t I = 0, E = S.size(); I < E; ++I)
|
|
if (matchOne(Pats, S.substr(I)))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
// If Pats[0] is not '*', it must consume one character.
|
|
if (S.empty() || !Pats[0][(uint8_t)S[0]])
|
|
return false;
|
|
Pats = Pats.slice(1);
|
|
S = S.substr(1);
|
|
}
|
|
}
|