mirror of
https://github.com/inspircd/inspircd.git
synced 2025-03-10 02:59:01 -04:00
Implement support for regex capture groups.
This commit is contained in:
parent
5d0e5914a0
commit
f7c041f560
@ -30,9 +30,16 @@ namespace Regex
|
||||
class Engine;
|
||||
class EngineReference;
|
||||
class Exception;
|
||||
class MatchCollection;
|
||||
class Pattern;
|
||||
template<typename> class SimpleEngine;
|
||||
|
||||
/** A list of matches that were captured by index. */
|
||||
typedef std::vector<std::string> Captures;
|
||||
|
||||
/** A list of matches that were captured by name. */
|
||||
typedef insp::flat_map<std::string, std::string> NamedCaptures;
|
||||
|
||||
/** A shared pointer to a regex pattern. */
|
||||
typedef std::shared_ptr<Pattern> PatternPtr;
|
||||
|
||||
@ -146,6 +153,34 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class Regex::MatchCollection
|
||||
{
|
||||
private:
|
||||
/** The substrings that were captured. */
|
||||
const Captures captures;
|
||||
|
||||
/** The substrings that were captured by name. */
|
||||
const NamedCaptures namedcaptures;
|
||||
|
||||
public:
|
||||
/** Initializes a new instance of the Regex::MatchCollection class.
|
||||
* @param c The substrings that were captured.
|
||||
* @param nc The substrings that were captured by name.
|
||||
*/
|
||||
MatchCollection(const Captures& c, const NamedCaptures& nc)
|
||||
: captures(c)
|
||||
, namedcaptures(nc)
|
||||
{
|
||||
}
|
||||
|
||||
/** Retrieves the substrings that were captured. */
|
||||
const Captures& GetCaptures() const { return captures; }
|
||||
|
||||
/** Retrieves the substrings that were captured by name. */
|
||||
const NamedCaptures& GetNamedCaptures() const { return namedcaptures; }
|
||||
};
|
||||
|
||||
/** Represents a compiled regular expression pattern. */
|
||||
class Regex::Pattern
|
||||
{
|
||||
@ -182,6 +217,12 @@ public:
|
||||
* @return If the text matched the pattern then true; otherwise, false.
|
||||
*/
|
||||
virtual bool IsMatch(const std::string& text) = 0;
|
||||
|
||||
/** Attempts to extract this pattern's match groups from the specified text.
|
||||
* @param text The text to extract match groups from..
|
||||
* @return If the text matched the pattern then a match collection; otherwise, std::nullopt.
|
||||
*/
|
||||
virtual std::optional<MatchCollection> Matches(const std::string& text) = 0;
|
||||
};
|
||||
|
||||
inline Regex::PatternPtr Regex::Engine::CreateHuman(const std::string& pattern) const
|
||||
|
@ -68,11 +68,56 @@ public:
|
||||
|
||||
bool IsMatch(const std::string& text) override
|
||||
{
|
||||
pcre2_match_data* unused = pcre2_match_data_create(1, nullptr);
|
||||
pcre2_match_data* unused = pcre2_match_data_create_from_pattern(regex, nullptr);
|
||||
int result = pcre2_match(regex, reinterpret_cast<PCRE2_SPTR8>(text.c_str()), text.length(), 0, 0, unused, nullptr);
|
||||
pcre2_match_data_free(unused);
|
||||
return result >= 0;
|
||||
}
|
||||
|
||||
std::optional<Regex::MatchCollection> Matches(const std::string& text) override
|
||||
{
|
||||
pcre2_match_data* data = pcre2_match_data_create_from_pattern(regex, nullptr);
|
||||
int result = pcre2_match(regex, reinterpret_cast<PCRE2_SPTR8>(text.c_str()), text.length(), 0, 0, data, nullptr);
|
||||
if (result < 0)
|
||||
return std::nullopt;
|
||||
|
||||
PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(data);
|
||||
|
||||
uint32_t capturecount;
|
||||
Regex::Captures captures;
|
||||
if (!pcre2_pattern_info(regex, PCRE2_INFO_CAPTURECOUNT, &capturecount) && capturecount)
|
||||
{
|
||||
for (uint32_t idx = 0; idx <= capturecount; ++idx)
|
||||
{
|
||||
PCRE2_UCHAR* bufferptr;
|
||||
PCRE2_SIZE bufferlen;
|
||||
if (!pcre2_substring_get_bynumber(data, idx, &bufferptr, &bufferlen))
|
||||
captures.emplace_back(reinterpret_cast<const char*>(bufferptr), bufferlen);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t namedcapturecount;
|
||||
Regex::NamedCaptures namedcaptures;
|
||||
if (!pcre2_pattern_info(regex, PCRE2_INFO_NAMECOUNT, &namedcapturecount) && namedcapturecount)
|
||||
{
|
||||
uint32_t nameentrysize;
|
||||
PCRE2_SPTR nametable;
|
||||
if (!pcre2_pattern_info(regex, PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize)
|
||||
&& !pcre2_pattern_info(regex, PCRE2_INFO_NAMETABLE, &nametable))
|
||||
{
|
||||
for (uint32_t idx = 0; idx < namedcapturecount; ++idx)
|
||||
{
|
||||
int matchidx = (nametable[0] << 8) | nametable[1];
|
||||
const std::string matchname(reinterpret_cast<const char*>(nametable + 2), nameentrysize - 3);
|
||||
const std::string matchvalue(text.c_str() + ovector[2 * matchidx], ovector[ 2 * matchidx + 1] - ovector[2 * matchidx]);
|
||||
namedcaptures.emplace(std::move(matchname), std::move(matchvalue));
|
||||
nametable += nameentrysize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Regex::MatchCollection(std::move(captures), std::move(namedcaptures));
|
||||
}
|
||||
};
|
||||
|
||||
class ModuleRegexPCRE final
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
POSIXPattern(const Module* mod, const std::string& pattern, uint8_t options)
|
||||
: Regex::Pattern(pattern, options)
|
||||
{
|
||||
int flags = REG_EXTENDED | REG_NOSUB;
|
||||
int flags = REG_EXTENDED;
|
||||
if (options & Regex::OPT_CASE_INSENSITIVE)
|
||||
flags |= REG_ICASE;
|
||||
|
||||
@ -66,6 +66,29 @@ public:
|
||||
{
|
||||
return !regexec(®ex, text.c_str(), 0, NULL, 0);
|
||||
}
|
||||
|
||||
std::optional<Regex::MatchCollection> Matches(const std::string& text) override
|
||||
{
|
||||
std::vector<regmatch_t> matches(32);
|
||||
int result = regexec(®ex, text.c_str(), matches.size(), &matches[0], 0);
|
||||
if (result)
|
||||
return std::nullopt;
|
||||
|
||||
Regex::Captures captures;
|
||||
for (const auto& match : matches)
|
||||
{
|
||||
if (match.rm_so == -1 || match.rm_eo == -1)
|
||||
break;
|
||||
|
||||
captures.emplace_back(text.c_str() + match.rm_so, match.rm_eo - match.rm_so);
|
||||
}
|
||||
captures.shrink_to_fit();
|
||||
|
||||
// The posix engine does not support named captures.
|
||||
static const Regex::NamedCaptures unusednc;
|
||||
|
||||
return Regex::MatchCollection(std::move(captures), unusednc);
|
||||
}
|
||||
};
|
||||
|
||||
class ModuleRegexPOSIX final
|
||||
|
@ -58,6 +58,27 @@ public:
|
||||
{
|
||||
return RE2::FullMatch(text, regex);
|
||||
}
|
||||
|
||||
std::optional<Regex::MatchCollection> Matches(const std::string& text) override
|
||||
{
|
||||
std::vector<re2::StringPiece> re2captures(regex.NumberOfCapturingGroups() + 1);
|
||||
bool result = regex.Match(text, 0, text.length(), RE2::ANCHOR_BOTH, &re2captures[0], static_cast<int>(re2captures.size()));
|
||||
if (!result)
|
||||
return std::nullopt;
|
||||
|
||||
Regex::Captures captures;
|
||||
Regex::NamedCaptures namedcaptures;
|
||||
for (size_t idx = 0; idx < re2captures.size(); ++idx)
|
||||
{
|
||||
captures.emplace_back(re2captures[idx]);
|
||||
|
||||
auto iter = regex.CapturingGroupNames().find(static_cast<int>(idx));
|
||||
if (iter != regex.CapturingGroupNames().end())
|
||||
namedcaptures.emplace(iter->second, re2captures[idx]);
|
||||
}
|
||||
|
||||
return Regex::MatchCollection(captures, namedcaptures);
|
||||
}
|
||||
};
|
||||
|
||||
class ModuleRegexRE2 final
|
||||
|
@ -39,6 +39,18 @@ public:
|
||||
{
|
||||
return InspIRCd::Match(text, GetPattern());
|
||||
}
|
||||
|
||||
std::optional<Regex::MatchCollection> Matches(const std::string& text) override
|
||||
{
|
||||
if (!InspIRCd::Match(text, GetPattern()))
|
||||
return std::nullopt;
|
||||
|
||||
// The glob engine does not support any kind of capture.
|
||||
static const Regex::Captures unusedc;
|
||||
static const Regex::NamedCaptures unusednc;
|
||||
|
||||
return Regex::MatchCollection(unusedc, unusednc);
|
||||
}
|
||||
};
|
||||
|
||||
class ModuleRegexGlob final
|
||||
|
@ -54,6 +54,22 @@ public:
|
||||
{
|
||||
return std::regex_search(text, regex);
|
||||
}
|
||||
|
||||
std::optional<Regex::MatchCollection> Matches(const std::string& text) override
|
||||
{
|
||||
std::smatch matches;
|
||||
if (!std::regex_search(text, matches, regex))
|
||||
return std::nullopt;
|
||||
|
||||
Regex::Captures captures(matches.size());
|
||||
for (const auto& match : matches)
|
||||
captures.push_back(match);
|
||||
|
||||
// The stdregex engine does not support named captures.
|
||||
static const Regex::NamedCaptures unusednc;
|
||||
|
||||
return Regex::MatchCollection(std::move(captures), unusednc);
|
||||
}
|
||||
};
|
||||
|
||||
class StdLibEngine final
|
||||
|
Loading…
x
Reference in New Issue
Block a user