Movatterモバイル変換


[0]ホーム

URL:


LLVM 20.0.0git
GlobPattern.cpp
Go to the documentation of this file.
1//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a glob pattern matcher.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/GlobPattern.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/Errc.h"
16
17using namespacellvm;
18
19// Expands character ranges and returns a bitmap.
20// For example, "a-cf-hz" is expanded to "abcfghz".
21staticExpected<BitVector>expand(StringRef S,StringRef Original) {
22BitVector BV(256,false);
23
24// Expand X-Y.
25for (;;) {
26if (S.size() < 3)
27break;
28
29uint8_t Start = S[0];
30uint8_tEnd = S[2];
31
32// If it doesn't start with something like X-Y,
33// consume the first character and proceed.
34if (S[1] !='-') {
35 BV[Start] =true;
36 S = S.substr(1);
37continue;
38 }
39
40// It must be in the form of X-Y.
41// Validate it and then interpret the range.
42if (Start >End)
43return make_error<StringError>("invalid glob pattern: " + Original,
44 errc::invalid_argument);
45
46for (intC = Start;C <=End; ++C)
47 BV[(uint8_t)C] =true;
48 S = S.substr(3);
49 }
50
51for (charC : S)
52 BV[(uint8_t)C] =true;
53return BV;
54}
55
56// Identify brace expansions in S and return the list of patterns they expand
57// into.
58staticExpected<SmallVector<std::string, 1>>
59parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
60SmallVector<std::string> SubPatterns = {S.str()};
61if (!MaxSubPatterns || !S.contains('{'))
62return std::move(SubPatterns);
63
64structBraceExpansion {
65size_t Start;
66size_tLength;
67SmallVector<StringRef, 2> Terms;
68 };
69SmallVector<BraceExpansion, 0> BraceExpansions;
70
71 BraceExpansion *CurrentBE =nullptr;
72size_t TermBegin;
73for (size_tI = 0, E = S.size();I != E; ++I) {
74if (S[I] =='[') {
75I = S.find(']',I + 2);
76if (I == std::string::npos)
77return make_error<StringError>("invalid glob pattern, unmatched '['",
78 errc::invalid_argument);
79 }elseif (S[I] =='{') {
80if (CurrentBE)
81return make_error<StringError>(
82"nested brace expansions are not supported",
83 errc::invalid_argument);
84 CurrentBE = &BraceExpansions.emplace_back();
85 CurrentBE->Start =I;
86 TermBegin =I + 1;
87 }elseif (S[I] ==',') {
88if (!CurrentBE)
89continue;
90 CurrentBE->Terms.push_back(S.substr(TermBegin,I - TermBegin));
91 TermBegin =I + 1;
92 }elseif (S[I] =='}') {
93if (!CurrentBE)
94continue;
95if (CurrentBE->Terms.empty())
96return make_error<StringError>(
97"empty or singleton brace expansions are not supported",
98 errc::invalid_argument);
99 CurrentBE->Terms.push_back(S.substr(TermBegin,I - TermBegin));
100 CurrentBE->Length =I - CurrentBE->Start + 1;
101 CurrentBE =nullptr;
102 }elseif (S[I] =='\\') {
103if (++I == E)
104return make_error<StringError>("invalid glob pattern, stray '\\'",
105 errc::invalid_argument);
106 }
107 }
108if (CurrentBE)
109return make_error<StringError>("incomplete brace expansion",
110 errc::invalid_argument);
111
112size_t NumSubPatterns = 1;
113for (auto &BE : BraceExpansions) {
114if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
115 NumSubPatterns = std::numeric_limits<size_t>::max();
116break;
117 }
118 NumSubPatterns *= BE.Terms.size();
119 }
120if (NumSubPatterns > *MaxSubPatterns)
121return make_error<StringError>("too many brace expansions",
122 errc::invalid_argument);
123// Replace brace expansions in reverse order so that we don't invalidate
124// earlier start indices
125for (auto &BE :reverse(BraceExpansions)) {
126SmallVector<std::string> OrigSubPatterns;
127std::swap(SubPatterns, OrigSubPatterns);
128for (StringRef Term : BE.Terms)
129for (StringRef Orig : OrigSubPatterns)
130 SubPatterns.emplace_back(Orig).replace(BE.Start, BE.Length, Term);
131 }
132return std::move(SubPatterns);
133}
134
135Expected<GlobPattern>
136GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
137GlobPattern Pat;
138
139// Store the prefix that does not contain any metacharacter.
140size_t PrefixSize = S.find_first_of("?*[{\\");
141 Pat.Prefix = S.substr(0, PrefixSize);
142if (PrefixSize == std::string::npos)
143return Pat;
144 S = S.substr(PrefixSize);
145
146SmallVector<std::string, 1> SubPats;
147if (auto Err =parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
148return std::move(Err);
149for (StringRef SubPat : SubPats) {
150auto SubGlobOrErr = SubGlobPattern::create(SubPat);
151if (!SubGlobOrErr)
152return SubGlobOrErr.takeError();
153 Pat.SubGlobs.push_back(*SubGlobOrErr);
154 }
155
156return Pat;
157}
158
159Expected<GlobPattern::SubGlobPattern>
160GlobPattern::SubGlobPattern::create(StringRef S) {
161 SubGlobPattern Pat;
162
163// Parse brackets.
164 Pat.Pat.assign(S.begin(), S.end());
165for (size_tI = 0, E = S.size();I != E; ++I) {
166if (S[I] =='[') {
167// ']' is allowed as the first character of a character class. '[]' is
168// invalid. So, just skip the first character.
169 ++I;
170size_t J = S.find(']',I + 1);
171if (J ==StringRef::npos)
172return make_error<StringError>("invalid glob pattern, unmatched '['",
173errc::invalid_argument);
174StringRef Chars = S.substr(I, J -I);
175bool Invert = S[I] =='^' || S[I] =='!';
176Expected<BitVector> BV =
177 Invert ?expand(Chars.substr(1), S) :expand(Chars, S);
178if (!BV)
179return BV.takeError();
180if (Invert)
181 BV->flip();
182 Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
183I = J;
184 }elseif (S[I] =='\\') {
185if (++I == E)
186return make_error<StringError>("invalid glob pattern, stray '\\'",
187errc::invalid_argument);
188 }
189 }
190return Pat;
191}
192
193boolGlobPattern::match(StringRef S) const{
194if (!S.consume_front(Prefix))
195returnfalse;
196if (SubGlobs.empty() && S.empty())
197returntrue;
198for (auto &Glob : SubGlobs)
199if (Glob.match(S))
200returntrue;
201returnfalse;
202}
203
204// Factor the pattern into segments split by '*'. The segment is matched
205// sequentianlly by finding the first occurrence past the end of the previous
206// match.
207bool GlobPattern::SubGlobPattern::match(StringRef Str) const{
208constchar *P = Pat.data(), *SegmentBegin =nullptr, *S = Str.data(),
209 *SavedS = S;
210constchar *const PEnd =P + Pat.size(), *constEnd = S + Str.size();
211size_tB = 0, SavedB = 0;
212while (S !=End) {
213if (P == PEnd)
214 ;
215elseif (*P =='*') {
216// The non-* substring on the left of '*' matches the tail of S. Save the
217// positions to be used by backtracking if we see a mismatch later.
218 SegmentBegin = ++P;
219 SavedS = S;
220 SavedB =B;
221continue;
222 }elseif (*P =='[') {
223if (Brackets[B].Bytes[uint8_t(*S)]) {
224P = Pat.data() + Brackets[B++].NextOffset;
225 ++S;
226continue;
227 }
228 }elseif (*P =='\\') {
229if (*++P == *S) {
230 ++P;
231 ++S;
232continue;
233 }
234 }elseif (*P == *S || *P =='?') {
235 ++P;
236 ++S;
237continue;
238 }
239if (!SegmentBegin)
240returnfalse;
241// We have seen a '*'. Backtrack to the saved positions. Shift the S
242// position to probe the next starting position in the segment.
243P = SegmentBegin;
244 S = ++SavedS;
245B = SavedB;
246 }
247// All bytes in Str have been matched. Return true if the rest part of Pat is
248// empty or contains only '*'.
249return getPat().find_first_not_of('*',P - Pat.data()) == std::string::npos;
250}
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
End
bool End
Definition:ELF_riscv.cpp:480
Errc.h
parseBraceExpansions
static Expected< SmallVector< std::string, 1 > > parseBraceExpansions(StringRef S, std::optional< size_t > MaxSubPatterns)
Definition:GlobPattern.cpp:59
expand
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition:GlobPattern.cpp:21
GlobPattern.h
I
#define I(x, y, z)
Definition:MD5.cpp:58
P
#define P(N)
StringRef.h
llvm::BitVector
Definition:BitVector.h:82
llvm::Expected
Tagged union holding either a T or a Error.
Definition:Error.h:481
llvm::Expected::takeError
Error takeError()
Take ownership of the stored error.
Definition:Error.h:608
llvm::GlobPattern
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
Definition:GlobPattern.h:50
llvm::GlobPattern::match
bool match(StringRef S) const
Definition:GlobPattern.cpp:193
llvm::GlobPattern::create
static Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
Definition:GlobPattern.cpp:136
llvm::SmallVectorBase::empty
bool empty() const
Definition:SmallVector.h:81
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition:SmallVector.h:937
llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition:SmallVector.h:413
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition:SmallVector.h:1196
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition:StringRef.h:51
llvm::StringRef::str
std::string str() const
str - Get the contents as an std::string.
Definition:StringRef.h:229
llvm::StringRef::substr
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition:StringRef.h:571
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition:StringRef.h:147
llvm::StringRef::begin
iterator begin() const
Definition:StringRef.h:116
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition:StringRef.h:150
llvm::StringRef::contains
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition:StringRef.h:424
llvm::StringRef::consume_front
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition:StringRef.h:635
llvm::StringRef::find_first_of
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition:StringRef.h:377
llvm::StringRef::end
iterator end() const
Definition:StringRef.h:118
llvm::StringRef::find
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition:StringRef.h:297
llvm::StringRef::npos
static constexpr size_t npos
Definition:StringRef.h:53
uint8_t
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition:CallingConv.h:34
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition:AddressRanges.h:18
llvm::Length
@ Length
Definition:DWP.cpp:480
llvm::errc::invalid_argument
@ invalid_argument
llvm::reverse
auto reverse(ContainerTy &&C)
Definition:STLExtras.h:420
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition:BitVector.h:860

Generated on Thu Jul 17 2025 12:39:58 for LLVM by doxygen 1.9.6
[8]ページ先頭

©2009-2025 Movatter.jp