1//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9// This file implements a glob pattern matcher. 11//===----------------------------------------------------------------------===// 19// Expands character ranges and returns a bitmap. 20// For example, "a-cf-hz" is expanded to "abcfghz". 32// If it doesn't start with something like X-Y, 33// consume the first character and proceed. 40// It must be in the form of X-Y. 41// Validate it and then interpret the range. 43return make_error<StringError>(
"invalid glob pattern: " + Original,
44 errc::invalid_argument);
46for (
intC = Start;
C <=
End; ++
C)
56// Identify brace expansions in S and return the list of patterns they expand 61if (!MaxSubPatterns || !S.
contains(
'{'))
62return std::move(SubPatterns);
71 BraceExpansion *CurrentBE =
nullptr;
73for (
size_tI = 0, E = S.
size();
I != E; ++
I) {
76if (
I == std::string::npos)
77return make_error<StringError>(
"invalid glob pattern, unmatched '['",
78 errc::invalid_argument);
79 }
elseif (S[
I] ==
'{') {
81return make_error<StringError>(
82"nested brace expansions are not supported",
83 errc::invalid_argument);
87 }
elseif (S[
I] ==
',') {
90 CurrentBE->Terms.push_back(S.
substr(TermBegin,
I - TermBegin));
92 }
elseif (S[
I] ==
'}') {
95if (CurrentBE->Terms.empty())
96return make_error<StringError>(
97"empty or singleton brace expansions are not supported",
98 errc::invalid_argument);
99 CurrentBE->Terms.push_back(S.
substr(TermBegin,
I - TermBegin));
100 CurrentBE->Length =
I - CurrentBE->Start + 1;
102 }
elseif (S[
I] ==
'\\') {
104return make_error<StringError>(
"invalid glob pattern, stray '\\'",
105 errc::invalid_argument);
109return make_error<StringError>(
"incomplete brace expansion",
110 errc::invalid_argument);
112size_t NumSubPatterns = 1;
113for (
auto &BE : BraceExpansions) {
114if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
115 NumSubPatterns = std::numeric_limits<size_t>::max();
118 NumSubPatterns *= BE.Terms.size();
120if (NumSubPatterns > *MaxSubPatterns)
121return make_error<StringError>(
"too many brace expansions",
122 errc::invalid_argument);
123// Replace brace expansions in reverse order so that we don't invalidate 124// earlier start indices 125for (
auto &BE :
reverse(BraceExpansions)) {
130 SubPatterns.
emplace_back(Orig).replace(BE.Start, BE.Length, Term);
132return std::move(SubPatterns);
139// Store the prefix that does not contain any metacharacter. 141 Pat.Prefix = S.
substr(0, PrefixSize);
142if (PrefixSize == std::string::npos)
148return std::move(Err);
150auto SubGlobOrErr = SubGlobPattern::create(SubPat);
152return SubGlobOrErr.takeError();
160GlobPattern::SubGlobPattern::create(
StringRef S) {
165for (
size_tI = 0, E = S.
size();
I != E; ++
I) {
167// ']' is allowed as the first character of a character class. '[]' is 168// invalid. So, just skip the first character. 170size_t J = S.
find(
']',
I + 1);
172return make_error<StringError>(
"invalid glob pattern, unmatched '['",
175bool Invert = S[
I] ==
'^' || S[
I] ==
'!';
182 Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
184 }
elseif (S[
I] ==
'\\') {
186return make_error<StringError>(
"invalid glob pattern, stray '\\'",
198for (
auto &Glob : SubGlobs)
204// Factor the pattern into segments split by '*'. The segment is matched 205// sequentianlly by finding the first occurrence past the end of the previous 207bool GlobPattern::SubGlobPattern::match(
StringRef Str)
const{
208constchar *
P = Pat.data(), *SegmentBegin =
nullptr, *S = Str.data(),
210constchar *
const PEnd =
P + Pat.size(), *
constEnd = S + Str.size();
211size_tB = 0, SavedB = 0;
216// The non-* substring on the left of '*' matches the tail of S. Save the 217// positions to be used by backtracking if we see a mismatch later. 223if (Brackets[
B].Bytes[
uint8_t(*S)]) {
224P = Pat.data() + Brackets[
B++].NextOffset;
228 }
elseif (*
P ==
'\\') {
234 }
elseif (*
P == *S || *
P ==
'?') {
241// We have seen a '*'. Backtrack to the saved positions. Shift the S 242// position to probe the next starting position in the segment. 247// All bytes in Str have been matched. Return true if the rest part of Pat is 248// empty or contains only '*'. 249return getPat().find_first_not_of(
'*',
P - Pat.data()) == std::string::npos;
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Expected< SmallVector< std::string, 1 > > parseBraceExpansions(StringRef S, std::optional< size_t > MaxSubPatterns)
static Expected< BitVector > expand(StringRef S, StringRef Original)
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
bool match(StringRef S) const
static Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
static constexpr size_t npos
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
auto reverse(ContainerTy &&C)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.