[libc++][regex] Validate backreferences in the constructor.
This patch enables throwing exceptions for invalid backreferences
in the constructor when using the basic, extended, grep, or egrep grammar.
This fixes bug 34297.
Differential Revision: https://reviews.llvm.org/D62453
Cr-Mirrored-From: https://chromium.googlesource.com/external/github.com/llvm/llvm-project
Cr-Mirrored-Commit: 72ce0c8073bc8dfa109d25a3c5f245ee0285568c
diff --git a/include/regex b/include/regex
index e349fa6..68cfbb2 100644
--- a/include/regex
+++ b/include/regex
@@ -4661,6 +4661,8 @@
unsigned __val = __traits_.value(c, 10);
if (__val >= 1 && __val <= 9)
{
+ if (__val > mark_count())
+ __throw_regex_error<regex_constants::error_backref>();
__push_back_ref(__val);
return true;
}
diff --git a/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp b/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
index 0a15b64..08a1444 100644
--- a/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
+++ b/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
@@ -18,11 +18,11 @@
#include <cassert>
#include "test_macros.h"
-static bool error_badbackref_thrown(const char *pat)
+static bool error_badbackref_thrown(const char *pat, std::regex::flag_type f)
{
bool result = false;
try {
- std::regex re(pat);
+ std::regex re(pat, f);
} catch (const std::regex_error &ex) {
result = (ex.code() == std::regex_constants::error_backref);
}
@@ -31,9 +31,25 @@
int main(int, char**)
{
- assert(error_badbackref_thrown("\\1abc")); // no references
- assert(error_badbackref_thrown("ab(c)\\2def")); // only one reference
- assert(error_badbackref_thrown("\\800000000000000000000000000000")); // overflows
+// no references
+ assert(error_badbackref_thrown("\\1abc", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("\\1abd", std::regex::basic));
+ assert(error_badbackref_thrown("\\1abd", std::regex::extended));
+ assert(error_badbackref_thrown("\\1abd", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1abd", std::regex::grep));
+ assert(error_badbackref_thrown("\\1abd", std::regex::egrep));
+
+// only one reference
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::basic));
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::extended));
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::awk) == false);
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::awk) == false);
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::grep));
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::egrep));
+
+
+ assert(error_badbackref_thrown("\\800000000000000000000000000000", std::regex_constants::ECMAScript)); // overflows
// this should NOT throw, because we only should look at the '1'
// See https://bugs.llvm.org/show_bug.cgi?id=31387
@@ -42,5 +58,35 @@
std::regex re(pat1, pat1 + 7); // extra chars after the end.
}
+// reference before group
+ assert(error_badbackref_thrown("\\1(abc)", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::basic));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::extended));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::grep));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::egrep));
+
+// reference limit
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::ECMAScript));
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::basic) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::extended) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::awk) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::grep) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::egrep) == false);
+
+// https://bugs.llvm.org/show_bug.cgi?id=34297
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::basic));
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::basic) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::extended) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::extended));
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::awk) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::grep));
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::grep) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::egrep) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::egrep));
+
return 0;
}