[llvm] [InstCombine] Prevent constant operand shrinkage on returns of zero-extension ANDs (PR #146201)

Fri Jun 27 23:54:26 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Hanyang (Eric) Xu (ericxu233)

<details>
<summary>Changes</summary>

Fixes: #143778

This patch updates the InstCombineSimplifyDemanded logic to avoid shrinking the RHS constant of an and instruction when the instruction has only one use and that use is a return. In such cases, if the constant is a bitmask of a simple zero-extension (e.g., 0xFF, 0xFFFF, etc.), shrinking it could prevent the backend from recognizing and optimizing it into efficient zero-extension code (e.g., using movzx or register size semantics on x86). This is particularly important when paired with llvm.assume intrinsic and code involving some control flow, which the backend may not be able to deduce enough information for optimization.

The two tests in known-bits.ll are additional examples of why this change is useful.

Before the change:
[optimized IR to x86 assembly with LLC](https://godbolt.org/z/arr5Tfzeh)
We can clearly see that there are redundant AND bitmasks in the x86 assembly.

After this change:
[optimized IR to x86 assembly with LLC](https://godbolt.org/z/s5Tfx3bh6)
No redundant ANDs anymore.

This is my first time contributing to open-source LLVM. Please let me know if I am doing anything wrong/non-standard here. This is my best effort to address this issue in the middle-end. We can probably implement the same optimization in the backend if we had enough information there, but it would essentially be undoing an optimization.





---
Full diff: https://github.com/llvm/llvm-project/pull/146201.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (+19-1) 
- (modified) llvm/test/Transforms/InstCombine/bswap-fold.ll (+1-1) 
- (modified) llvm/test/Transforms/InstCombine/known-bits.ll (+2-2) 
- (modified) llvm/test/Transforms/InstCombine/trunc-shl-zext.ll (+2-2) 
- (modified) llvm/test/Transforms/InstCombine/vscale.ll (+1-1) 


``````````diff

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 0e3436d12702d..362d03f912b3b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -234,8 +234,26 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
     if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
       return I->getOperand(1);
 
+    // If the 'and' has only one use and that use is a return instruction,
+    // and the constant mask is a simple low-bit zero-extension (like 0xFF,
+    // 0xFFFF, etc.), then skip constant RHS shrinkage. This is because the
+    // backend is capable of optimizing this pattern into a single instruction
+    // zero-extension during codegen anyways. For example, by utilizing the eax
+    // register in x86. Performing the constant operand shrinkage transformation
+    // here may block the backend's optimization especially with assumes which
+    // the backend is unable to take advantage of.
+    bool IsReturnOnlyUse = I->hasOneUse() && isa<ReturnInst>(I->user_back());
+
+    const APInt *C;
+    bool IsZextBitmask = match(I->getOperand(1), m_APInt(C)) &&
+                         ((C->getBitWidth() >= 8 && C->isMask(8)) ||
+                          (C->getBitWidth() >= 16 && C->isMask(16)) ||
+                          (C->getBitWidth() >= 32 && C->isMask(32)) ||
+                          (C->getBitWidth() >= 64 && C->isMask(64)));
+
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
+    if (!(IsReturnOnlyUse && IsZextBitmask) &&
+        ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
       return I;
 
     break;
diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index f7268ec9df090..9a4144b834fe9 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -838,7 +838,7 @@ define i32 @bs_active_high7(i32 %0) {
 define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high4(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], splat (i64 4)
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 240)
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 255)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, <i64 60, i64 60>
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index 9a9fec694ff0e..1e653f3201f39 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -384,7 +384,7 @@ define i64 @test_icmp_trunc2(i64 %x) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], 12
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[SEXT:%.*]] = and i64 [[X]], 2147483647
+; CHECK-NEXT:    [[SEXT:%.*]] = and i64 [[X]], 4294967295
 ; CHECK-NEXT:    ret i64 [[SEXT]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret i64 0
@@ -408,7 +408,7 @@ define i64 @test_icmp_trunc3(i64 %n) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[CONV]], 96
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[RET:%.*]] = and i64 [[N]], 127
+; CHECK-NEXT:    [[RET:%.*]] = and i64 [[N]], 4294967295
 ; CHECK-NEXT:    ret i64 [[RET]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret i64 0
diff --git a/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll b/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll
index 576125b86de8f..03d3ea7f1f5fa 100644
--- a/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll
@@ -7,7 +7,7 @@ define i32 @trunc_shl_zext_32(i32 %a) {
 ; CHECK-LABEL: define i32 @trunc_shl_zext_32
 ; CHECK-SAME: (i32 [[A:%.*]]) {
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A]], 4
-; CHECK-NEXT:    [[EXT:%.*]] = and i32 [[SHL]], 65520
+; CHECK-NEXT:    [[EXT:%.*]] = and i32 [[SHL]], 65535
 ; CHECK-NEXT:    ret i32 [[EXT]]
 ;
   %trunc = trunc i32 %a to i16
@@ -20,7 +20,7 @@ define i64 @trunc_shl_zext_64(i64 %a) {
 ; CHECK-LABEL: define i64 @trunc_shl_zext_64
 ; CHECK-SAME: (i64 [[A:%.*]]) {
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[A]], 7
-; CHECK-NEXT:    [[EXT:%.*]] = and i64 [[SHL]], 128
+; CHECK-NEXT:    [[EXT:%.*]] = and i64 [[SHL]], 255
 ; CHECK-NEXT:    ret i64 [[EXT]]
 ;
   %trunc = trunc i64 %a to i8
diff --git a/llvm/test/Transforms/InstCombine/vscale.ll b/llvm/test/Transforms/InstCombine/vscale.ll
index dbb5ca4bae9be..550af4c4d6ccd 100644
--- a/llvm/test/Transforms/InstCombine/vscale.ll
+++ b/llvm/test/Transforms/InstCombine/vscale.ll
@@ -17,7 +17,7 @@ define i64 @pomote_zext_shl_vscale_i32_to_i64() {
 ; CHECK-LABEL: @pomote_zext_shl_vscale_i32_to_i64(
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[VSCALE]], 3
-; CHECK-NEXT:    [[EXT:%.*]] = and i64 [[SHL]], 4294967288
+; CHECK-NEXT:    [[EXT:%.*]] = and i64 [[SHL]], 4294967295
 ; CHECK-NEXT:    ret i64 [[EXT]]
 ;
   %vscale = call i32 @llvm.vscale.i32()

``````````

</details>


https://github.com/llvm/llvm-project/pull/146201