[llvm] [CodeGenPrepare] Drop nsw flags in `optimizeLoadExt` (PR #118180)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 30 08:58:34 PST 2024


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/118180

Alive2: https://alive2.llvm.org/ce/z/pMcD7q
Closes https://github.com/llvm/llvm-project/issues/118172.


>From 858315445c7ee6a9d7fc4ab7ab60f2ec776426e3 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 1 Dec 2024 00:33:03 +0800
Subject: [PATCH 1/2] [CodeGenPrepare] Add pre-commit tests. NFC.

---
 .../Transforms/CodeGenPrepare/X86/pr118172.ll | 86 +++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll

diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll
new file mode 100644
index 00000000000000..ae5c0e385c973c
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; Make sure the nsw flag is dropped when the load ext is combined.
+define i32 @simplify_load_ext_drop_trunc_nsw(ptr %p) {
+; CHECK-LABEL: define i32 @simplify_load_ext_drop_trunc_nsw(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X]], 255
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nsw i32 [[TMP0]] to i8
+; CHECK-NEXT:    [[EXT1:%.*]] = sext i8 [[TRUNC]] to i16
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    [[EXT2:%.*]] = zext i16 [[EXT1]] to i32
+; CHECK-NEXT:    ret i32 [[EXT2]]
+;
+entry:
+  %x = load i32, ptr %p, align 4
+  %trunc = trunc nsw i32 %x to i8
+  %ext1 = sext i8 %trunc to i16
+  %conv2 = and i32 %x, 255
+  call void @use(i32 %conv2)
+  %ext2 = zext i16 %ext1 to i32
+  ret i32 %ext2
+}
+
+; Make sure the nsw flag is dropped when the load ext is combined.
+define i32 @simplify_load_ext_drop_shl_nsw(ptr %p) {
+; CHECK-LABEL: define i32 @simplify_load_ext_drop_shl_nsw(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X]], 255
+; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i32 [[TMP0]], 24
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[SHL]]
+;
+entry:
+  %x = load i32, ptr %p, align 4
+  %shl = shl nsw i32 %x, 24
+  %conv2 = and i32 %x, 255
+  call void @use(i32 %conv2)
+  ret i32 %shl
+}
+
+define i32 @simplify_load_ext_keep_trunc_nuw(ptr %p) {
+; CHECK-LABEL: define i32 @simplify_load_ext_keep_trunc_nuw(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X]], 255
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nuw i32 [[TMP0]] to i8
+; CHECK-NEXT:    [[EXT1:%.*]] = sext i8 [[TRUNC]] to i16
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    [[EXT2:%.*]] = zext i16 [[EXT1]] to i32
+; CHECK-NEXT:    ret i32 [[EXT2]]
+;
+entry:
+  %x = load i32, ptr %p, align 4
+  %trunc = trunc nuw i32 %x to i8
+  %ext1 = sext i8 %trunc to i16
+  %conv2 = and i32 %x, 255
+  call void @use(i32 %conv2)
+  %ext2 = zext i16 %ext1 to i32
+  ret i32 %ext2
+}
+
+define i32 @simplify_load_ext_drop_shl_nuw(ptr %p) {
+; CHECK-LABEL: define i32 @simplify_load_ext_drop_shl_nuw(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X]], 255
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 [[TMP0]], 24
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[SHL]]
+;
+entry:
+  %x = load i32, ptr %p, align 4
+  %shl = shl nuw i32 %x, 24
+  %conv2 = and i32 %x, 255
+  call void @use(i32 %conv2)
+  ret i32 %shl
+}
+
+declare void @use(i32)

>From f43daa37a27131c71157259507e9f0ea0a532220 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 1 Dec 2024 00:46:36 +0800
Subject: [PATCH 2/2] [CodeGenPrepare] Drop nsw flags in `optimizeLoadExt`

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp                 | 7 +++++++
 llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ead8aa6d220973..83c6ecd401039f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7138,6 +7138,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
   SmallVector<Instruction *, 8> WorkList;
   SmallPtrSet<Instruction *, 16> Visited;
   SmallVector<Instruction *, 8> AndsToMaybeRemove;
+  SmallVector<Instruction *, 8> DropFlags;
   for (auto *U : Load->users())
     WorkList.push_back(cast<Instruction>(U));
 
@@ -7185,6 +7186,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
         return false;
       uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
       DemandBits.setLowBits(BitWidth - ShiftAmt);
+      DropFlags.push_back(I);
       break;
     }
 
@@ -7192,6 +7194,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
       EVT TruncVT = TLI->getValueType(*DL, I->getType());
       unsigned TruncBitWidth = TruncVT.getSizeInBits();
       DemandBits.setLowBits(TruncBitWidth);
+      DropFlags.push_back(I);
       break;
     }
 
@@ -7249,6 +7252,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
       ++NumAndUses;
     }
 
+  // NSW flags may not longer hold.
+  for (auto *Inst : DropFlags)
+    Inst->setHasNoSignedWrap(false);
+
   ++NumAndsAdded;
   return true;
 }
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll
index ae5c0e385c973c..a599129466b35c 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr118172.ll
@@ -8,7 +8,7 @@ define i32 @simplify_load_ext_drop_trunc_nsw(ptr %p) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X]], 255
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nsw i32 [[TMP0]] to i8
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[TMP0]] to i8
 ; CHECK-NEXT:    [[EXT1:%.*]] = sext i8 [[TRUNC]] to i16
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    [[EXT2:%.*]] = zext i16 [[EXT1]] to i32
@@ -31,7 +31,7 @@ define i32 @simplify_load_ext_drop_shl_nsw(ptr %p) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X]], 255
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i32 [[TMP0]], 24
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TMP0]], 24
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret i32 [[SHL]]
 ;



More information about the llvm-commits mailing list