[llvm] 2d3167f - [SeparateConstOffsetFromGEP] Avoid miscompiles related to trunc nuw/nsw (#154582)

Bjorn Pettersson via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 22 01:29:41 PDT 2025


Author: Bjorn Pettersson
Date: 2025-08-22T10:27:57+02:00
New Revision: 2d3167f8d86ca9bdaff44cf839488c5a513f5583

URL: https://github.com/llvm/llvm-project/commit/2d3167f8d86ca9bdaff44cf839488c5a513f5583
DIFF: https://github.com/llvm/llvm-project/commit/2d3167f8d86ca9bdaff44cf839488c5a513f5583.diff

LOG: [SeparateConstOffsetFromGEP] Avoid miscompiles related to trunc nuw/nsw (#154582)

Drop poison generating flags on trunc when distributing trunc over
add/sub/or. We need to do this since for example
(add (trunc nuw A), (trunc nuw B)) is more poisonous than
(trunc nuw (add A, B))).

In some situations it is pessimistic to drop the flags. Such as
if the add in the example above also has the nuw flag. For now we
keep it simple and always drop the flags.

Worth mentioning is that we drop the flags when cloning
instructions and rebuilding the chain. This is done after the
"allowsPreservingNUW" checks in ConstantOffsetExtractor::Extract.
So we still take the "trunc nuw" into consideration when determining
if nuw can be preserved in the gep (which should be ok since that
check also require that all the involved binary operations has nuw).

Fixes #154116

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
    llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
    llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/rebuild-trunc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index a88f8f19f852e..8b9d06d7e4439 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -642,6 +642,13 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
 
     Instruction *Ext = I->clone();
     Ext->setOperand(0, Current);
+    // In ConstantOffsetExtractor::find we do not analyze nuw/nsw for trunc, so
+    // we assume that it is ok to redistribute trunc over add/sub/or. But for
+    // example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc
+    // nuw (add A, B))). To make such redistributions legal we drop all the
+    // poison generating flags from cloned trunc instructions here.
+    if (isa<TruncInst>(Ext))
+      Ext->dropPoisonGeneratingFlags();
     Ext->insertBefore(*IP->getParent(), IP);
     Current = Ext;
   }

diff  --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 2a5b678e91fd8..0d21a99153ea0 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -311,7 +311,7 @@ entry:
 define ptr @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(ptr %p, i128 %i) {
 ; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc nuw i128 [[I:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i128 [[I:%.*]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
 ; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]

diff  --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/rebuild-trunc.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/rebuild-trunc.ll
index 8970bfc488421..a93257f7e564a 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/rebuild-trunc.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/rebuild-trunc.ll
@@ -1,13 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=separate-const-offset-from-gep -S | FileCheck %s
 
-; FIXME: (add (trunc nuw A), (trunc nuw B)) is more poisonous than
-;        (trunc nuw (add A, B))), so we should for example drop the
-;        nuw on the trunc when doing the rewrite here.
+; Verify that we drop "nuw" from trunc.
 define ptr @pr154116_nuw(ptr %p, i128 %i) {
 ; CHECK-LABEL: define ptr @pr154116_nuw(
 ; CHECK-SAME: ptr [[P:%.*]], i128 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc nuw i128 [[I]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i128 [[I]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 80
 ; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]
@@ -18,13 +16,11 @@ define ptr @pr154116_nuw(ptr %p, i128 %i) {
   ret ptr %arrayidx
 }
 
-; FIXME: (add (trunc nsw A), (trunc nsw B)) is more poisonous than
-;        (trunc nsw (add A, B))), so we should for example drop the
-;        nsw on the trunc when doing the rewrite here.
+; Verify that we drop "nsw" from trunc.
 define ptr @pr154116_nsw(ptr %p, i128 %i) {
 ; CHECK-LABEL: define ptr @pr154116_nsw(
 ; CHECK-SAME: ptr [[P:%.*]], i128 [[I:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc nsw i128 [[I]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i128 [[I]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 4
 ; CHECK-NEXT:    ret ptr [[ARRAYIDX2]]


        


More information about the llvm-commits mailing list