[llvm] [FIX] Fix undefined-behaviour in regex engine. (PR #73071)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 30 04:51:28 PST 2023


https://github.com/tanmaysachan updated https://github.com/llvm/llvm-project/pull/73071

>From e24049717c1bacfc7d62bd94513fb0bd207aef3e Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 22 Nov 2023 08:09:08 +0530
Subject: [PATCH 1/6] Fix undefined-behaviour in regex engine.

- Running the regex engine on an empty string causes "Applying non-zero offset to null pointer" UB.
- This patch puts a check in the matcher.
- Bug discovered through "mlir-text-parser-fuzzer" module.
---
 llvm/lib/Support/regengine.inc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index f23993abc6e7e71..54dd96ab9cfada5 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,7 +146,9 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 	const char *stop;
 
 	/* simplify the situation where possible */
-	if (g->cflags&REG_NOSUB)
+        if (!string)
+		return(REG_INVARG);
+        if (g->cflags&REG_NOSUB)
 		nmatch = 0;
 	if (eflags&REG_STARTEND) {
 		start = string + pmatch[0].rm_so;

>From 54f6553107834de00401381381883561479a5e09 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 11:19:19 +0530
Subject: [PATCH 2/6] Move fix to Regex.cpp

---
 llvm/lib/Support/Regex.cpp     | 4 ++++
 llvm/lib/Support/regengine.inc | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 8fa71a749cc8e10..4221f74055615aa 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -82,6 +82,10 @@ unsigned Regex::getNumMatches() const {
 
 bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
                   std::string *Error) const {
+  // Exit without match if string is empty
+  if (String.empty())
+    return false;
+
   // Reset error, if given.
   if (Error && !Error->empty())
     *Error = "";
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index 54dd96ab9cfada5..8f5028f8bba541e 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,8 +146,6 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 	const char *stop;
 
 	/* simplify the situation where possible */
-        if (!string)
-		return(REG_INVARG);
         if (g->cflags&REG_NOSUB)
 		nmatch = 0;
 	if (eflags&REG_STARTEND) {

>From 3b5b785a825d6ca54c830a5bb0759ffab64f01f7 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 11:23:24 +0530
Subject: [PATCH 3/6] Fix formatting

---
 llvm/lib/Support/regengine.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index 8f5028f8bba541e..f23993abc6e7e71 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,7 +146,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
 	const char *stop;
 
 	/* simplify the situation where possible */
-        if (g->cflags&REG_NOSUB)
+	if (g->cflags&REG_NOSUB)
 		nmatch = 0;
 	if (eflags&REG_STARTEND) {
 		start = string + pmatch[0].rm_so;

>From 08dfaded55818d445638f1a4f95c4ffcae5779b6 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 13:51:13 +0530
Subject: [PATCH 4/6] Change empty check to ptr check

---
 llvm/lib/Support/Regex.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 4221f74055615aa..de6b630ee3b5e96 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -82,8 +82,8 @@ unsigned Regex::getNumMatches() const {
 
 bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
                   std::string *Error) const {
-  // Exit without match if string is empty
-  if (String.empty())
+  // No match for null string data
+  if (String.data() == nullptr)
     return false;
 
   // Reset error, if given.

>From f879580c6c188573dd930f27ab0e0b62901d27cf Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 14:35:08 +0530
Subject: [PATCH 5/6] Update null string to empty string

---
 llvm/lib/Support/Regex.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index de6b630ee3b5e96..5eedf95c48e3784 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -82,10 +82,6 @@ unsigned Regex::getNumMatches() const {
 
 bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
                   std::string *Error) const {
-  // No match for null string data
-  if (String.data() == nullptr)
-    return false;
-
   // Reset error, if given.
   if (Error && !Error->empty())
     *Error = "";
@@ -96,6 +92,10 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
 
   unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
 
+  // Update null string to empty string.
+  if (String.data() == nullptr)
+    String = "";
+
   // pmatch needs to have at least one element.
   SmallVector<llvm_regmatch_t, 8> pm;
   pm.resize(nmatch > 0 ? nmatch : 1);

>From 9abb2cb8d1ed2ca862b138692419878ae7ad6a81 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Thu, 30 Nov 2023 18:14:00 +0530
Subject: [PATCH 6/6] Add unittest for null string input

---
 llvm/unittests/Support/RegexTest.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/unittests/Support/RegexTest.cpp b/llvm/unittests/Support/RegexTest.cpp
index e3c721b466c6ccd..09f674bb209c079 100644
--- a/llvm/unittests/Support/RegexTest.cpp
+++ b/llvm/unittests/Support/RegexTest.cpp
@@ -225,3 +225,10 @@ TEST_F(RegexTest, OssFuzz3727Regression) {
 }
 
 }
+
+TEST_F(RegexTest, NullStringInput) {
+  Regex r("^$");
+  // String data points to nullptr in default constructor
+  StringRef String;
+  EXPECT_TRUE(r.match(String));
+}



More information about the llvm-commits mailing list