[llvm] [FIX] Fix undefined-behaviour in regex engine. (PR #73071)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 21:53:44 PST 2023
https://github.com/tanmaysachan updated https://github.com/llvm/llvm-project/pull/73071
>From e24049717c1bacfc7d62bd94513fb0bd207aef3e Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 22 Nov 2023 08:09:08 +0530
Subject: [PATCH 1/3] Fix undefined-behaviour in regex engine.
- Running the regex engine on an empty string causes "Applying non-zero offset to null pointer" UB.
- This patch puts a check in the matcher.
- Bug discovered through "mlir-text-parser-fuzzer" module.
---
llvm/lib/Support/regengine.inc | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index f23993abc6e7e71..54dd96ab9cfada5 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,7 +146,9 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
const char *stop;
/* simplify the situation where possible */
- if (g->cflags®_NOSUB)
+ if (!string)
+ return(REG_INVARG);
+ if (g->cflags®_NOSUB)
nmatch = 0;
if (eflags®_STARTEND) {
start = string + pmatch[0].rm_so;
>From 54f6553107834de00401381381883561479a5e09 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 11:19:19 +0530
Subject: [PATCH 2/3] Move fix to Regex.cpp
---
llvm/lib/Support/Regex.cpp | 4 ++++
llvm/lib/Support/regengine.inc | 2 --
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 8fa71a749cc8e10..4221f74055615aa 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -82,6 +82,10 @@ unsigned Regex::getNumMatches() const {
bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches,
std::string *Error) const {
+ // Exit without match if string is empty
+ if (String.empty())
+ return false;
+
// Reset error, if given.
if (Error && !Error->empty())
*Error = "";
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index 54dd96ab9cfada5..8f5028f8bba541e 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,8 +146,6 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
const char *stop;
/* simplify the situation where possible */
- if (!string)
- return(REG_INVARG);
if (g->cflags®_NOSUB)
nmatch = 0;
if (eflags®_STARTEND) {
>From f88252710e45987034d9a1f7e3725615fe4a7190 Mon Sep 17 00:00:00 2001
From: tanmaysachan <tnmysachan at gmail.com>
Date: Wed, 29 Nov 2023 11:23:24 +0530
Subject: [PATCH 3/3] Fix formatting
---
llvm/lib/Support/regengine.inc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index 8f5028f8bba541e..f9ad1e6894c86c6 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -146,8 +146,8 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
const char *stop;
/* simplify the situation where possible */
- if (g->cflags®_NOSUB)
- nmatch = 0;
+ if (g->cflags & REG_NOSUB)
+ nmatch = 0;
if (eflags®_STARTEND) {
start = string + pmatch[0].rm_so;
stop = string + pmatch[0].rm_eo;
More information about the llvm-commits
mailing list