Rewrite the regex system from scratch.

* Move everything to the Regex namespace:
  - Regex -> Regex::Pattern
  - RegexException -> Regex::Exception
  - RegexFactory -> Regex::Engine

* Add support for regex flags.
  - Regex::OPT_CASE_INSENSITIVE performs case-insensitive matching.

* Add the Regex::EngineReference class as a friendly wrapper around
  dynamic_reference_nocheck<Regex::Engine>.

* Add the Regex::SimpleEngine template class for automating the
  implementation of regex factory classes.

* Use std::shared_ptr for Regex::Pattern objects instead of making
  users manage memory manually.
This commit is contained in:
Sadie Powell 2020-07-28 16:43:59 +01:00
parent 7000d31765
commit 1621a84f96
9 changed files with 295 additions and 229 deletions

View File

@ -27,41 +27,152 @@
#include "inspircd.h"
class Regex : public classbase
namespace Regex
{
class Engine;
class EngineReference;
class Exception;
class Pattern;
template<typename> class SimpleEngine;
/** A shared pointer to a regex pattern. */
typedef std::shared_ptr<Pattern> PatternPtr;
/** The options to use when matching a pattern. */
enum PatternOptions : uint8_t
{
/** No special matching options apply. */
OPT_NONE = 0,
/** The pattern is case insensitive. */
OPT_CASE_INSENSITIVE = 1,
};
}
/** The base class for regular expression engines. */
class Regex::Engine
: public DataProvider
{
protected:
/** The uncompiled regex string. */
std::string regex_string;
// Constructor may as well be protected, as this class is abstract.
Regex(const std::string& rx) : regex_string(rx) { }
/** Initializes a new instance of the Regex::Engine class.
* @param Creator The module which created this instance.
* @param Name The name of this regular expression engine.
*/
Engine(Module* Creator, const std::string& Name)
: DataProvider(Creator, "regex/" + Name)
{
}
public:
/** Compiles a regular expression pattern.
* @param pattern The pattern to compile.
* @param options One or more options to use when matching the pattern.
* @return A shared pointer to an instance of the Regex::Pattern class.
*/
virtual PatternPtr Create(const std::string& pattern, uint8_t options = Regex::OPT_NONE) = 0;
};
virtual ~Regex() { }
virtual bool Matches(const std::string& text) = 0;
const std::string& GetRegexString() const
/**The base class for simple regular expression engines. */
template<typename PatternClass>
class Regex::SimpleEngine final
: public Regex::Engine
{
return regex_string;
public:
/** @copydoc Regex::Engine::Engine */
SimpleEngine(Module* Creator, const std::string& Name)
: Regex::Engine(Creator, Name)
{
}
/** @copydoc Regex::Engine::Create */
PatternPtr Create(const std::string& pattern, uint8_t options) override
{
return std::make_shared<PatternClass>(pattern, options);
}
};
class RegexFactory : public DataProvider
/** A dynamic reference to an instance of the Regex::Engine class. */
class Regex::EngineReference final
: public dynamic_reference_nocheck<Engine>
{
public:
RegexFactory(Module* Creator, const std::string& Name) : DataProvider(Creator, Name) { }
/** Initializes a new instance of the Regex::EngineReference class.
* @param Creator The module which created this instance.
* @param Name The name of the regular expression engine to reference.
*/
EngineReference(Module* Creator, const std::string& Name = "")
: dynamic_reference_nocheck<Engine>(Creator, Name.empty() ? "regex" : "regex/" + Name)
{
}
virtual Regex* Create(const std::string& expr) = 0;
/** Sets the name of the engine this reference is configured with.
* @param engine The name of the engine to refer to.
*/
void SetEngine(const std::string& engine)
{
SetProvider(engine.empty() ? "regex" : "regex/" + engine);
}
};
class RegexException : public ModuleException
/** The exception which is thrown when a regular expression fails to compile. */
class Regex::Exception final
: public ModuleException
{
public:
RegexException(const std::string& regex, const std::string& error)
: ModuleException("Error in regex '" + regex + "': " + error) { }
/** Initializes a new instance of the Regex::Exception class.
* @param regex A regular expression which failed to compile.
* @param error The error which occurred whilst compiling the regular expression.
*/
Exception(const std::string& regex, const std::string& error)
: ModuleException("Error in regex '" + regex + "': " + error)
{
}
RegexException(const std::string& regex, const std::string& error, int offset)
: ModuleException("Error in regex '" + regex + "' at offset " + ConvToStr(offset) + ": " + error) { }
/** Initializes a new instance of the Regex::Exception class.
* @param regex A regular expression which failed to compile.
* @param error The error which occurred whilst compiling the regular expression.
* @param offset The offset at which the errror occurred.
*/
Exception(const std::string& regex, const std::string& error, int offset)
: ModuleException("Error in regex '" + regex + "' at offset " + ConvToStr(offset) + ": " + error)
{
}
};
/** Represents a compiled regular expression pattern. */
class Regex::Pattern
{
private:
/** The options used when matching this pattern. */
const uint8_t optionflags;
/** The pattern as a string. */
const std::string patternstr;
protected:
/** Initializes a new instance of the Pattern class.
* @param Pattern The pattern as a string.
* @param Options The options used when matching this pattern.
*/
Pattern(const std::string& pattern, uint8_t options)
: optionflags(options)
, patternstr(pattern)
{
}
public:
/** Destroys an instance of the Pattern class. */
virtual ~Pattern() = default;
/** Retrieves the options used when matching this pattern. */
uint8_t GetOptions() const { return optionflags; }
/** Retrieves the pattern as a string. */
const std::string& GetPattern() const { return patternstr; }
/** Attempts to match this pattern against the specified text.
* @param text The text to match against.
* @return If the text matched the pattern then true; otherwise, false.
*/
virtual bool IsMatch(const std::string& text) = 0;
};

View File

@ -34,60 +34,55 @@
#include "inspircd.h"
#include <pcre.h>
#include "modules/regex.h"
#include <pcre.h>
#ifdef _WIN32
# pragma comment(lib, "libpcre.lib")
#endif
class PCRERegex : public Regex
class PCREPattern final
: public Regex::Pattern
{
private:
pcre* regex;
public:
PCRERegex(const std::string& rx) : Regex(rx)
PCREPattern(const std::string& pattern, uint8_t options)
: Regex::Pattern(pattern, options)
{
int flags = 0;
if (options & Regex::OPT_CASE_INSENSITIVE)
flags &= PCRE_CASELESS;
const char* error;
int erroffset;
regex = pcre_compile(rx.c_str(), 0, &error, &erroffset, NULL);
int erroroffset;
regex = pcre_compile(pattern.c_str(), flags, &error, &erroroffset, NULL);
if (!regex)
{
ServerInstance->Logs.Log(MODNAME, LOG_DEBUG, "pcre_compile failed: /%s/ [%d] %s", rx.c_str(), erroffset, error);
throw RegexException(rx, error, erroffset);
}
throw Regex::Exception(pattern, error, erroroffset);
}
~PCRERegex()
~PCREPattern()
{
pcre_free(regex);
}
bool Matches(const std::string& text) override
bool IsMatch(const std::string& text) override
{
return (pcre_exec(regex, NULL, text.c_str(), text.length(), 0, 0, NULL, 0) >= 0);
}
};
class PCREFactory : public RegexFactory
{
public:
PCREFactory(Module* m) : RegexFactory(m, "regex/pcre") {}
Regex* Create(const std::string& expr) override
{
return new PCRERegex(expr);
return pcre_exec(regex, NULL, text.c_str(), text.length(), 0, 0, NULL, 0) >= 0;
}
};
class ModuleRegexPCRE : public Module
{
private:
PCREFactory ref;
Regex::SimpleEngine<PCREPattern> regex;
public:
ModuleRegexPCRE()
: Module(VF_VENDOR, "Provides a regular expression engine which uses the PCRE library.")
, ref(this)
, regex(this, "pcre")
{
}
};

View File

@ -24,73 +24,61 @@
#include "inspircd.h"
#include "modules/regex.h"
#include <sys/types.h>
#include <regex.h>
#include <sys/types.h>
class POSIXRegex : public Regex
class POSIXPattern final
: public Regex::Pattern
{
regex_t regbuf;
private:
regex_t regex;
public:
POSIXRegex(const std::string& rx, bool extended) : Regex(rx)
POSIXPattern(const std::string& pattern, uint8_t options)
: Regex::Pattern(pattern, options)
{
int flags = (extended ? REG_EXTENDED : 0) | REG_NOSUB;
int errcode;
errcode = regcomp(&regbuf, rx.c_str(), flags);
if (errcode)
{
// Get the error string into a std::string. YUCK this involves at least 2 string copies.
std::string error;
char* errbuf;
size_t sz = regerror(errcode, &regbuf, NULL, 0);
errbuf = new char[sz + 1];
memset(errbuf, 0, sz + 1);
regerror(errcode, &regbuf, errbuf, sz + 1);
error = errbuf;
delete[] errbuf;
regfree(&regbuf);
throw RegexException(rx, error);
}
int flags = REG_EXTENDED | REG_NOSUB;
if (options & Regex::OPT_CASE_INSENSITIVE)
flags &= REG_ICASE;
int error = regcomp(&regex, pattern.c_str(), flags);
if (!error)
return;
// Retrieve the size of the error message and allocate a buffer.
size_t errorsize = regerror(error, &regex, NULL, 0);
std::vector<char> errormsg(errorsize);
// Retrieve the error message and free the buffer.
regerror(error, &regex, &errormsg[0], errormsg.size());
regfree(&regex);
throw Regex::Exception(pattern, std::string(&errormsg[0], errormsg.size()));
}
~POSIXRegex()
~POSIXPattern()
{
regfree(&regbuf);
regfree(&regex);
}
bool Matches(const std::string& text) override
bool IsMatch(const std::string& text) override
{
return (regexec(&regbuf, text.c_str(), 0, NULL, 0) == 0);
}
};
class PosixFactory : public RegexFactory
{
public:
bool extended;
PosixFactory(Module* m) : RegexFactory(m, "regex/posix") {}
Regex* Create(const std::string& expr) override
{
return new POSIXRegex(expr, extended);
return !regexec(&regex, text.c_str(), 0, NULL, 0);
}
};
class ModuleRegexPOSIX : public Module
{
private:
PosixFactory ref;
Regex::SimpleEngine<POSIXPattern> regex;
public:
ModuleRegexPOSIX()
: Module(VF_VENDOR, "Provides a regular expression engine which uses the POSIX.2 regular expression matching system.")
, ref(this)
, regex(this, "posix")
{
}
void ReadConfig(ConfigStatus& status) override
{
ref.extended = ServerInstance->Config->ConfValue("posix")->getBool("extended");
}
};
MODULE_INIT(ModuleRegexPOSIX)

View File

@ -29,55 +29,46 @@
#include "inspircd.h"
#include "modules/regex.h"
// Fix warnings about shadowing on GCC.
#ifdef __GNUC__
# pragma GCC diagnostic push
#endif
#include <re2/re2.h>
#ifdef __GNUC__
# pragma GCC diagnostic pop
#endif
class RE2Regex : public Regex
class RE2Pattern final
: public Regex::Pattern
{
RE2 regexcl;
private:
RE2 regex;
RE2::Options BuildOptions(uint8_t options)
{
RE2::Options re2options;
re2options.set_case_sensitive(!(options & Regex::OPT_CASE_INSENSITIVE));
re2options.set_log_errors(false);
return re2options;
}
public:
RE2Regex(const std::string& rx) : Regex(rx), regexcl(rx, RE2::Quiet)
RE2Pattern(const std::string& pattern, uint8_t options)
: Regex::Pattern(pattern, options)
, regex(pattern, BuildOptions(options))
{
if (!regexcl.ok())
{
throw RegexException(rx, regexcl.error());
}
if (!regex.ok())
throw Regex::Exception(pattern, regex.error());
}
bool Matches(const std::string& text) override
bool IsMatch(const std::string& text) override
{
return RE2::FullMatch(text, regexcl);
}
};
class RE2Factory : public RegexFactory
{
public:
RE2Factory(Module* m) : RegexFactory(m, "regex/re2") { }
Regex* Create(const std::string& expr) override
{
return new RE2Regex(expr);
return RE2::FullMatch(text, regex);
}
};
class ModuleRegexRE2 : public Module
{
private:
RE2Factory ref;
Regex::SimpleEngine<RE2Pattern> regex;
public:
ModuleRegexRE2()
: Module(VF_VENDOR, "Provides a regular expression engine which uses the RE2 library.")
, ref(this)
, regex(this, "re2")
{
}
};

View File

@ -31,65 +31,59 @@
#include "inspircd.h"
#include "modules/regex.h"
#include <sys/types.h>
#include <tre/regex.h>
class TRERegex : public Regex
class TREPattern final
: public Regex::Pattern
{
regex_t regbuf;
private:
regex_t regex;
public:
TRERegex(const std::string& rx) : Regex(rx)
TREPattern(const std::string& pattern, uint8_t options)
: Regex::Pattern(pattern, options)
{
int flags = REG_EXTENDED | REG_NOSUB;
int errcode;
errcode = regcomp(&regbuf, rx.c_str(), flags);
if (errcode)
{
// Get the error string into a std::string. YUCK this involves at least 2 string copies.
std::string error;
char* errbuf;
size_t sz = regerror(errcode, &regbuf, NULL, 0);
errbuf = new char[sz + 1];
memset(errbuf, 0, sz + 1);
regerror(errcode, &regbuf, errbuf, sz + 1);
error = errbuf;
delete[] errbuf;
regfree(&regbuf);
throw RegexException(rx, error);
}
if (options & Regex::OPT_CASE_INSENSITIVE)
flags &= REG_ICASE;
int error = regcomp(&regex, pattern.c_str(), flags);
if (!error)
return;
// Retrieve the size of the error message and allocate a buffer.
size_t errorsize = regerror(error, &regex, NULL, 0);
std::vector<char> errormsg(errorsize);
// Retrieve the error message and free the buffer.
regerror(error, &regex, &errormsg[0], errormsg.size());
regfree(&regex);
throw Regex::Exception(pattern, std::string(&errormsg[0], errormsg.size()));
}
~TRERegex()
~TREPattern()
{
regfree(&regbuf);
regfree(&regex);
}
bool Matches(const std::string& text) override
bool IsMatch(const std::string& text) override
{
return (regexec(&regbuf, text.c_str(), 0, NULL, 0) == 0);
}
};
class TREFactory : public RegexFactory
{
public:
TREFactory(Module* m) : RegexFactory(m, "regex/tre") {}
Regex* Create(const std::string& expr) override
{
return new TRERegex(expr);
return !regexec(&regex, text.c_str(), 0, NULL, 0);
}
};
class ModuleRegexTRE : public Module
{
private:
TREFactory trf;
Regex::SimpleEngine<TREPattern> regex;
public:
ModuleRegexTRE()
: Module(VF_VENDOR, "Provides a regular expression engine which uses the TRE library.")
, trf(this)
, regex(this, "tre")
{
}
};

View File

@ -62,7 +62,7 @@ enum FilterAction
class FilterResult
{
public:
Regex* regex;
Regex::PatternPtr regex;
std::string freeform;
std::string reason;
FilterAction action;
@ -77,7 +77,7 @@ class FilterResult
bool flag_strip_color;
bool flag_no_registered;
FilterResult(dynamic_reference<RegexFactory>& RegexEngine, const std::string& free, const std::string& rea, FilterAction act, unsigned long gt, const std::string& fla, bool cfg)
FilterResult(Regex::EngineReference& RegexEngine, const std::string& free, const std::string& rea, FilterAction act, unsigned long gt, const std::string& fla, bool cfg)
: freeform(free)
, reason(rea)
, action(act)
@ -194,12 +194,12 @@ class ModuleFilter
bool initing = true;
bool notifyuser;
bool warnonselfmsg;
RegexFactory* factory;
Regex::Engine* factory;
void FreeFilters();
public:
CommandFilter filtcommand;
dynamic_reference<RegexFactory> RegexEngine;
Regex::EngineReference RegexEngine;
std::vector<FilterResult> filters;
int flags;
@ -350,7 +350,7 @@ ModuleFilter::ModuleFilter()
, ServerProtocol::SyncEventListener(this)
, Stats::EventListener(this)
, filtcommand(this)
, RegexEngine(this, "regex")
, RegexEngine(this)
{
}
@ -367,9 +367,6 @@ CullResult ModuleFilter::cull()
void ModuleFilter::FreeFilters()
{
for (std::vector<FilterResult>::const_iterator i = filters.begin(); i != filters.end(); ++i)
delete i->regex;
filters.clear();
}
@ -640,11 +637,8 @@ void ModuleFilter::ReadConfig(ConfigStatus& status)
factory = RegexEngine ? (RegexEngine.operator->()) : NULL;
if (newrxengine.empty())
RegexEngine.SetProvider("regex");
else
RegexEngine.SetProvider("regex/" + newrxengine);
RegexEngine.SetEngine(newrxengine);
if (!RegexEngine)
{
if (newrxengine.empty())
@ -764,7 +758,7 @@ FilterResult* ModuleFilter::FilterMatch(User* user, const std::string &text, int
InspIRCd::StripColor(stripped_text);
}
if (filter->regex->Matches(filter->flag_strip_color ? stripped_text : text))
if (filter->regex->IsMatch(filter->flag_strip_color ? stripped_text : text))
return filter;
}
return NULL;
@ -777,7 +771,6 @@ bool ModuleFilter::DeleteFilter(const std::string& freeform, std::string& reason
if (i->freeform == freeform)
{
reason.assign(i->reason);
delete i->regex;
filters.erase(i);
return true;
}
@ -855,7 +848,6 @@ void ModuleFilter::ReadFilters()
if (filter->from_config)
{
removedfilters.insert(filter->freeform);
delete filter->regex;
filter = filters.erase(filter);
continue;
}

View File

@ -23,42 +23,33 @@
*/
#include "modules/regex.h"
#include "inspircd.h"
#include "modules/regex.h"
class GlobRegex : public Regex
class GlobPattern final
: public Regex::Pattern
{
public:
GlobRegex(const std::string& rx) : Regex(rx)
GlobPattern(const std::string& pattern, uint8_t options)
: Regex::Pattern(pattern, options)
{
}
bool Matches(const std::string& text) override
bool IsMatch(const std::string& text) override
{
return InspIRCd::Match(text, this->regex_string);
return InspIRCd::Match(text, GetPattern());
}
};
class GlobFactory : public RegexFactory
{
public:
Regex* Create(const std::string& expr) override
{
return new GlobRegex(expr);
}
GlobFactory(Module* m) : RegexFactory(m, "regex/glob") {}
};
class ModuleRegexGlob : public Module
{
private:
GlobFactory gf;
Regex::SimpleEngine<GlobPattern> regex;
public:
ModuleRegexGlob()
: Module(VF_VENDOR, "Provides a regular expression engine which uses the built-in glob matching system.")
, gf(this)
, regex(this, "glob")
{
}
};

View File

@ -22,58 +22,73 @@
#include "inspircd.h"
#include "modules/regex.h"
#include <regex>
class StdRegex : public Regex
class StdLibPattern final
: public Regex::Pattern
{
std::regex regexcl;
private:
std::regex regex;
public:
StdRegex(const std::string& rx, std::regex::flag_type fltype) : Regex(rx)
StdLibPattern(const std::string& pattern, uint8_t options, std::regex::flag_type type)
: Regex::Pattern(pattern, options)
{
// Convert the generic pattern options to stdlib pattern flags.
std::regex_constants::syntax_option_type flags = type | std::regex::optimize;
if (options & Regex::OPT_CASE_INSENSITIVE)
flags |= std::regex::icase;
try
{
regexcl.assign(rx, fltype | std::regex::optimize);
regex.assign(pattern, flags);
}
catch(const std::regex_error& rxerr)
catch(const std::regex_error& error)
{
throw RegexException(rx, rxerr.what());
throw Regex::Exception(pattern, error.what());
}
}
bool Matches(const std::string& text) override
bool IsMatch(const std::string& text) override
{
return std::regex_search(text, regexcl);
return std::regex_search(text, regex);
}
};
class StdRegexFactory : public RegexFactory
class StdLibEngine final
: public Regex::Engine
{
public:
std::regex::flag_type regextype;
StdRegexFactory(Module* m) : RegexFactory(m, "regex/stdregex") {}
Regex* Create(const std::string& expr) override
StdLibEngine(Module* Creator)
: Regex::Engine(Creator, "stdregex")
{
return new StdRegex(expr, regextype);
}
Regex::PatternPtr Create(const std::string& pattern, uint8_t options) override
{
return std::make_shared<StdLibPattern>(pattern, options, regextype);
}
};
class ModuleRegexStd : public Module
class ModuleRegexStdLib : public Module
{
private:
StdRegexFactory ref;
StdLibEngine regex;
public:
ModuleRegexStd()
ModuleRegexStdLib()
: Module(VF_VENDOR, "Provides a regular expression engine which uses the C++11 std::regex regular expression matching system.")
, ref(this)
, regex(this)
{
}
void ReadConfig(ConfigStatus& status) override
{
ConfigTag* tag = ServerInstance->Config->ConfValue("stdregex");
ref.regextype = tag->getEnum("type", std::regex::ECMAScript,
regex.regextype = tag->getEnum("type", std::regex::ECMAScript,
{
{ "awk", std::regex::awk },
{ "bre", std::regex::basic },
@ -85,4 +100,4 @@ class ModuleRegexStd : public Module
}
};
MODULE_INIT(ModuleRegexStd)
MODULE_INIT(ModuleRegexStdLib)

View File

@ -46,7 +46,7 @@ class RLine : public XLine
* @param regex Pattern to match with
* @
*/
RLine(time_t s_time, unsigned long d, const std::string& src, const std::string& re, const std::string& regexs, dynamic_reference<RegexFactory>& rxfactory)
RLine(time_t s_time, unsigned long d, const std::string& src, const std::string& re, const std::string& regexs, Regex::EngineReference& rxfactory)
: XLine(s_time, d, src, re, "R")
, matchtext(regexs)
{
@ -56,13 +56,6 @@ class RLine : public XLine
regex = rxfactory->Create(regexs);
}
/** Destructor
*/
~RLine()
{
delete regex;
}
bool Matches(User* u) override
{
LocalUser* lu = IS_LOCAL(u);
@ -71,12 +64,12 @@ class RLine : public XLine
const std::string host = u->nick + "!" + u->ident + "@" + u->GetRealHost() + " " + u->GetRealName();
const std::string ip = u->nick + "!" + u->ident + "@" + u->GetIPString() + " " + u->GetRealName();
return (regex->Matches(host) || regex->Matches(ip));
return (regex->IsMatch(host) || regex->IsMatch(ip));
}
bool Matches(const std::string& compare) override
{
return regex->Matches(compare);
return regex->IsMatch(compare);
}
void Apply(User* u) override
@ -104,7 +97,7 @@ class RLine : public XLine
std::string matchtext;
Regex *regex;
Regex::PatternPtr regex;
};
@ -113,8 +106,8 @@ class RLine : public XLine
class RLineFactory : public XLineFactory
{
public:
dynamic_reference<RegexFactory>& rxfactory;
RLineFactory(dynamic_reference<RegexFactory>& rx) : XLineFactory("R"), rxfactory(rx)
Regex::EngineReference& rxfactory;
RLineFactory(Regex::EngineReference& rx) : XLineFactory("R"), rxfactory(rx)
{
}
@ -226,18 +219,18 @@ class ModuleRLine
, public Stats::EventListener
{
private:
dynamic_reference<RegexFactory> rxfactory;
Regex::EngineReference rxfactory;
RLineFactory f;
CommandRLine r;
bool MatchOnNickChange;
bool initing = true;
RegexFactory* factory;
Regex::Engine* factory;
public:
ModuleRLine()
: Module(VF_VENDOR | VF_COMMON, "Adds the /RLINE command which allows server operators to prevent users matching a nickname!username@hostname+realname regular expression from connecting to the server.")
, Stats::EventListener(this)
, rxfactory(this, "regex")
, rxfactory(this)
, f(rxfactory)
, r(this, f)
{
@ -284,11 +277,7 @@ class ModuleRLine
factory = rxfactory ? (rxfactory.operator->()) : NULL;
if (newrxengine.empty())
rxfactory.SetProvider("regex");
else
rxfactory.SetProvider("regex/" + newrxengine);
rxfactory.SetEngine(newrxengine);
if (!rxfactory)
{
if (newrxengine.empty())