[llvm] [FIX] Fix undefined-behaviour in regex engine. (PR #73071)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 04:51:28 PST 2023
https://github.com/tanmaysachan updated https://github.com/llvm/llvm-project/pull/73071
>From e24049717c1bacfc7d62bd94513fb0bd207aef3e Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 22 Nov 2023 08:09:08 +0530
Subject: [PATCH 1/6] Fix undefined-behaviour in regex engine.
- Running the regex engine on an empty string causes "Applying non-zero offset to null pointer" UB.
- This patch puts a check in the matcher.
- Bug discovered through "mlir-text-parser-fuzzer" module.
---
llvm/lib/Support/regengine.inc | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index f23993abc6e7e71..54dd96ab9cfada5 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,7 +146,9 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
const char *stop;
/* simplify the situation where possible */
- if (g->cflags®_NOSUB)
+ if (!string)
+ return(REG_INVARG);
+ if (g->cflags®_NOSUB)
nmatch = 0;
if (eflags®_STARTEND) {
start = string + pmatch[0].rm_so;
>From 54f6553107834de00401381381883561479a5e09 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 11:19:19 +0530
Subject: [PATCH 2/6] Move fix to Regex.cpp
---
llvm/lib/Support/Regex.cpp | 4 ++++
llvm/lib/Support/regengine.inc | 2 --
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 8fa71a749cc8e10..4221f74055615aa 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -82,6 +82,10 @@ unsigned Regex::getNumMatches() const {
bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
std::string *Error) const {
+ // Exit without match if string is empty
+ if (String.empty())
+ return false;
+
// Reset error, if given.
if (Error && !Error->empty())
*Error = "";
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index 54dd96ab9cfada5..8f5028f8bba541e 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,8 +146,6 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
const char *stop;
/* simplify the situation where possible */
- if (!string)
- return(REG_INVARG);
if (g->cflags®_NOSUB)
nmatch = 0;
if (eflags®_STARTEND) {
>From 3b5b785a825d6ca54c830a5bb0759ffab64f01f7 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 11:23:24 +0530
Subject: [PATCH 3/6] Fix formatting
---
llvm/lib/Support/regengine.inc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index 8f5028f8bba541e..f23993abc6e7e71 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,7 +146,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
const char *stop;
/* simplify the situation where possible */
- if (g->cflags®_NOSUB)
+ if (g->cflags®_NOSUB)
nmatch = 0;
if (eflags®_STARTEND) {
start = string + pmatch[0].rm_so;
>From 08dfaded55818d445638f1a4f95c4ffcae5779b6 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 13:51:13 +0530
Subject: [PATCH 4/6] Change empty check to ptr check
---
llvm/lib/Support/Regex.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 4221f74055615aa..de6b630ee3b5e96 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -82,8 +82,8 @@ unsigned Regex::getNumMatches() const {
bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
std::string *Error) const {
- // Exit without match if string is empty
- if (String.empty())
+ // No match for null string data
+ if (String.data() == nullptr)
return false;
// Reset error, if given.
>From f879580c6c188573dd930f27ab0e0b62901d27cf Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 14:35:08 +0530
Subject: [PATCH 5/6] Update null string to empty string
---
llvm/lib/Support/Regex.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index de6b630ee3b5e96..5eedf95c48e3784 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -82,10 +82,6 @@ unsigned Regex::getNumMatches() const {
bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
std::string *Error) const {
- // No match for null string data
- if (String.data() == nullptr)
- return false;
-
// Reset error, if given.
if (Error && !Error->empty())
*Error = "";
@@ -96,6 +92,10 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+ // Update null string to empty string.
+ if (String.data() == nullptr)
+ String = "";
+
// pmatch needs to have at least one element.
SmallVector<llvm_regmatch_t, 8> pm;
pm.resize(nmatch > 0 ? nmatch : 1);
>From 9abb2cb8d1ed2ca862b138692419878ae7ad6a81 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Thu, 30 Nov 2023 18:14:00 +0530
Subject: [PATCH 6/6] Add unittest for null string input
---
llvm/unittests/Support/RegexTest.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/unittests/Support/RegexTest.cpp b/llvm/unittests/Support/RegexTest.cpp
index e3c721b466c6ccd..09f674bb209c079 100644
--- a/llvm/unittests/Support/RegexTest.cpp
+++ b/llvm/unittests/Support/RegexTest.cpp
@@ -225,3 +225,10 @@ TEST_F(RegexTest, OssFuzz3727Regression) {
}
}
+
+TEST_F(RegexTest, NullStringInput) {
+ Regex r("^$");
+ // String data points to nullptr in default constructor
+ StringRef String;
+ EXPECT_TRUE(r.match(String));
+}
More information about the llvm-commits
mailing list