[llvm] [SelectionDAG] Second SimplifyDemandedBits pass for AND RHS using LHS known zeros (scalar only) (PR #185235)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 16:31:21 PDT 2026
https://github.com/SiliconA-Z updated https://github.com/llvm/llvm-project/pull/185235
>From 8271ff6169c76aaf01018b1cc2a28750a43d8824 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 7 Mar 2026 16:59:48 -0500
Subject: [PATCH 1/3] [SelectionDAG] Second SimplifyDemandedBits pass for AND
RHS using LHS known zeros (scalar only)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add a second SimplifyDemandedBits pass for the RHS of an AND in `TargetLowering::SimplifyDemandedBits`. We already simplify the LHS (Op0) with a mask narrowed by the RHS’s known zeros (`~Known.Zero & DemandedBits`). This change also simplifies the RHS (Op1) with a mask narrowed by the LHS’s known zeros (`~Known2.Zero & DemandedBits`), so both sides get the same kind of simplification (e.g. both `~X & Y` and `X & ~Y` can benefit).
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 14 +
llvm/test/CodeGen/AArch64/reduce-and.ll | 18 +-
.../AArch64/vecreduce-and-legalization.ll | 6 +-
llvm/test/CodeGen/X86/atomic-rm-bit-test.ll | 256 +++++++++---------
llvm/test/CodeGen/X86/movmsk-cmp.ll | 4 -
llvm/test/CodeGen/X86/pr34137.ll | 6 +-
6 files changed, 150 insertions(+), 154 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cf827ef547628..f3f38359ebcf0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1513,6 +1513,20 @@ bool TargetLowering::SimplifyDemandedBits(
Known2, TLO, Depth + 1))
return true;
+ // FIXME: Pretty much all these extra conditions are to avoid regressions in
+ // x86 and AMDGPU.
+ unsigned Op1Opc = Op1.getOpcode();
+ if (!VT.isVector() &&
+ (Op1Opc == ISD::ZERO_EXTEND || Op1Opc == ISD::SIGN_EXTEND ||
+ Op1Opc == ISD::ANY_EXTEND || Op1Opc == ISD::TRUNCATE) &&
+ Op1.getOperand(0).getValueType().getScalarType() != MVT::i1 &&
+ (~Known2.Zero & DemandedBits) != DemandedBits) {
+ Known2 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ if (SimplifyDemandedBits(Op1, ~Known2.Zero & DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ }
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index 8ca521327c2e3..2ede4314a5fcf 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -263,9 +263,9 @@ define i8 @test_redand_v8i8(<8 x i8> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
-; CHECK-NEXT: and x8, x8, x8, lsr #16
-; CHECK-NEXT: lsr x9, x8, #8
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w0, w8, w8, lsr #8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_redand_v8i8:
@@ -298,9 +298,9 @@ define i8 @test_redand_v16i8(<16 x i8> %a) {
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
-; CHECK-NEXT: and x8, x8, x8, lsr #16
-; CHECK-NEXT: lsr x9, x8, #8
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w0, w8, w8, lsr #8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_redand_v16i8:
@@ -335,9 +335,9 @@ define i8 @test_redand_v32i8(<32 x i8> %a) {
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
-; CHECK-NEXT: and x8, x8, x8, lsr #16
-; CHECK-NEXT: lsr x9, x8, #8
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w0, w8, w8, lsr #8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_redand_v32i8:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index ac54dd41b0962..820f35f2a4567 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -107,9 +107,9 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
-; CHECK-NEXT: and x8, x8, x8, lsr #16
-; CHECK-NEXT: lsr x9, x8, #8
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w0, w8, w8, lsr #8
; CHECK-NEXT: ret
%b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)
ret i8 %b
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
index 71887e369bd18..169fdf90a6470 100644
--- a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
@@ -1286,11 +1286,10 @@ define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_valz(ptr %v, i16 zeroext %c) n
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB22_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %ecx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: testl %ecx, %esi
-; X86-NEXT: sete %al
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %eax, %esi
+; X86-NEXT: sete %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -1312,11 +1311,10 @@ define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_valz(ptr %v, i16 zeroext %c) n
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB22_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testl %ecx, %edx
-; X64-NEXT: sete %al
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: testl %eax, %edx
+; X64-NEXT: sete %cl
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%0 = and i16 %c, 15
@@ -1335,51 +1333,49 @@ define zeroext i16 @atomic_blsi_xor_16_gpr_valz(ptr %v, i16 zeroext %c) nounwind
; X86-LABEL: atomic_blsi_xor_16_gpr_valz:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: negl %ecx
-; X86-NEXT: andl %eax, %ecx
-; X86-NEXT: movzwl (%edx), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movzwl (%ecx), %eax
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB23_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: xorl %ecx, %esi
+; X86-NEXT: xorl %edx, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %si, (%edx)
+; X86-NEXT: lock cmpxchgw %si, (%ecx)
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB23_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: testl %edx, %ecx
-; X86-NEXT: sete %al
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %eax, %edx
+; X86-NEXT: sete %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: atomic_blsi_xor_16_gpr_valz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: negl %ecx
-; X64-NEXT: andl %esi, %ecx
+; X64-NEXT: movl %esi, %edx
+; X64-NEXT: negl %edx
+; X64-NEXT: andl %esi, %edx
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: .p2align 4
; X64-NEXT: .LBB23_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: xorl %ecx, %edx
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: xorl %edx, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %dx, (%rdi)
+; X64-NEXT: lock cmpxchgw %cx, (%rdi)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB23_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testl %edx, %ecx
-; X64-NEXT: sete %al
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: testl %eax, %edx
+; X64-NEXT: sete %cl
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%conv = zext i16 %c to i32
@@ -1679,11 +1675,10 @@ define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_valnz(ptr %v, i16 zeroext %c)
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB28_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %ecx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: testl %ecx, %esi
-; X86-NEXT: setne %al
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %eax, %esi
+; X86-NEXT: setne %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -1705,11 +1700,10 @@ define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_valnz(ptr %v, i16 zeroext %c)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB28_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testl %ecx, %edx
-; X64-NEXT: setne %al
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: testl %eax, %edx
+; X64-NEXT: setne %cl
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%0 = and i16 %c, 15
@@ -1728,51 +1722,49 @@ define zeroext i16 @atomic_blsi_xor_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwin
; X86-LABEL: atomic_blsi_xor_16_gpr_valnz:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: negl %ecx
-; X86-NEXT: andl %eax, %ecx
-; X86-NEXT: movzwl (%edx), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movzwl (%ecx), %eax
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB29_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: xorl %ecx, %esi
+; X86-NEXT: xorl %edx, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %si, (%edx)
+; X86-NEXT: lock cmpxchgw %si, (%ecx)
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB29_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: testl %edx, %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %eax, %edx
+; X86-NEXT: setne %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: atomic_blsi_xor_16_gpr_valnz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: negl %ecx
-; X64-NEXT: andl %esi, %ecx
+; X64-NEXT: movl %esi, %edx
+; X64-NEXT: negl %edx
+; X64-NEXT: andl %esi, %edx
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: .p2align 4
; X64-NEXT: .LBB29_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: xorl %ecx, %edx
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: xorl %edx, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %dx, (%rdi)
+; X64-NEXT: lock cmpxchgw %cx, (%rdi)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB29_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testl %edx, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: testl %eax, %edx
+; X64-NEXT: setne %cl
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%conv = zext i16 %c to i32
@@ -2141,14 +2133,14 @@ define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_brz(ptr %v, i16 zeroext %c) no
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB34_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %ecx
-; X86-NEXT: movw $123, %ax
-; X86-NEXT: testl %ecx, %esi
+; X86-NEXT: movw $123, %cx
+; X86-NEXT: testl %eax, %esi
; X86-NEXT: jne .LBB34_4
; X86-NEXT: # %bb.3: # %if.then
; X86-NEXT: movzwl %bx, %eax
-; X86-NEXT: movzwl (%edx,%eax,2), %eax
+; X86-NEXT: movzwl (%edx,%eax,2), %ecx
; X86-NEXT: .LBB34_4: # %return
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %ebx
; X86-NEXT: retl
@@ -2172,15 +2164,16 @@ define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_brz(ptr %v, i16 zeroext %c) no
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB34_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %ecx
-; X64-NEXT: movw $123, %ax
-; X64-NEXT: testl %ecx, %esi
+; X64-NEXT: movw $123, %cx
+; X64-NEXT: testl %eax, %esi
; X64-NEXT: je .LBB34_3
; X64-NEXT: # %bb.4: # %return
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB34_3: # %if.then
; X64-NEXT: movzwl %dx, %eax
-; X64-NEXT: movzwl (%rdi,%rax,2), %eax
+; X64-NEXT: movzwl (%rdi,%rax,2), %ecx
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%0 = and i16 %c, 15
@@ -2209,59 +2202,60 @@ define zeroext i16 @atomic_blsi_xor_16_gpr_brz(ptr %v, i16 zeroext %c) nounwind
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: negl %esi
-; X86-NEXT: andl %ecx, %esi
-; X86-NEXT: movzwl (%edx), %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: negl %edi
+; X86-NEXT: andl %edx, %edi
+; X86-NEXT: movzwl (%esi), %eax
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB35_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: xorl %esi, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: xorl %edi, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %di, (%edx)
+; X86-NEXT: lock cmpxchgw %cx, (%esi)
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB35_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %edi
-; X86-NEXT: movw $123, %ax
-; X86-NEXT: testl %edi, %esi
+; X86-NEXT: movw $123, %cx
+; X86-NEXT: testl %eax, %edi
; X86-NEXT: jne .LBB35_4
; X86-NEXT: # %bb.3: # %if.then
-; X86-NEXT: movzwl %cx, %eax
-; X86-NEXT: movzwl (%edx,%eax,2), %eax
+; X86-NEXT: movzwl %dx, %eax
+; X86-NEXT: movzwl (%esi,%eax,2), %ecx
; X86-NEXT: .LBB35_4: # %return
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_blsi_xor_16_gpr_brz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: negl %ecx
-; X64-NEXT: andl %esi, %ecx
+; X64-NEXT: movl %esi, %edx
+; X64-NEXT: negl %edx
+; X64-NEXT: andl %esi, %edx
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: .p2align 4
; X64-NEXT: .LBB35_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl %eax, %edx
-; X64-NEXT: xorl %ecx, %edx
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: xorl %edx, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: lock cmpxchgw %dx, (%rdi)
+; X64-NEXT: lock cmpxchgw %cx, (%rdi)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB35_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %edx
-; X64-NEXT: movw $123, %ax
-; X64-NEXT: testl %edx, %ecx
+; X64-NEXT: movw $123, %cx
+; X64-NEXT: testl %eax, %edx
; X64-NEXT: je .LBB35_3
; X64-NEXT: # %bb.4: # %return
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB35_3: # %if.then
; X64-NEXT: movzwl %si, %eax
-; X64-NEXT: movzwl (%rdi,%rax,2), %eax
+; X64-NEXT: movzwl (%rdi,%rax,2), %ecx
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%conv = zext i16 %c to i32
@@ -2893,11 +2887,10 @@ define zeroext i16 @atomic_shl1_mask01_and_16_gpr_valnz(ptr %v, i16 zeroext %c)
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB46_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %ecx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: testl %ecx, %esi
-; X86-NEXT: setne %al
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %eax, %esi
+; X86-NEXT: setne %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
@@ -2922,11 +2915,10 @@ define zeroext i16 @atomic_shl1_mask01_and_16_gpr_valnz(ptr %v, i16 zeroext %c)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB46_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testl %ecx, %edx
-; X64-NEXT: setne %al
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: testl %eax, %edx
+; X64-NEXT: setne %cl
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%0 = and i16 %c, 15
@@ -2947,56 +2939,54 @@ define zeroext i16 @atomic_blsi_and_16_gpr_valnz(ptr %v, i16 zeroext %c) nounwin
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: negl %ecx
-; X86-NEXT: andl %eax, %ecx
-; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: notl %esi
-; X86-NEXT: movzwl (%edx), %eax
+; X86-NEXT: movzwl (%ecx), %eax
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB47_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %edi
; X86-NEXT: andl %esi, %edi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: lock cmpxchgw %di, (%edx)
+; X86-NEXT: lock cmpxchgw %di, (%ecx)
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB47_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: testl %edx, %ecx
-; X86-NEXT: setne %al
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %eax, %edx
+; X86-NEXT: setne %cl
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: atomic_blsi_and_16_gpr_valnz:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: negl %ecx
-; X64-NEXT: andl %esi, %ecx
-; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: notl %edx
+; X64-NEXT: movl %esi, %edx
+; X64-NEXT: negl %edx
+; X64-NEXT: andl %esi, %edx
+; X64-NEXT: movl %edx, %ecx
+; X64-NEXT: notl %ecx
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: .p2align 4
; X64-NEXT: .LBB47_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl %eax, %esi
-; X64-NEXT: andl %edx, %esi
+; X64-NEXT: andl %ecx, %esi
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %si, (%rdi)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB47_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testl %edx, %ecx
-; X64-NEXT: setne %al
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: testl %eax, %edx
+; X64-NEXT: setne %cl
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
%conv = zext i16 %c to i32
@@ -3382,7 +3372,6 @@ define zeroext i16 @atomic_shl1_mask01_and_16_gpr_brnz(ptr %v, i16 zeroext %c) n
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB52_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: testl %eax, %esi
; X86-NEXT: je .LBB52_3
; X86-NEXT: # %bb.4: # %if.then
@@ -3418,7 +3407,6 @@ define zeroext i16 @atomic_shl1_mask01_and_16_gpr_brnz(ptr %v, i16 zeroext %c) n
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB52_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: testl %eax, %esi
; X64-NEXT: je .LBB52_3
; X64-NEXT: # %bb.4: # %if.then
@@ -3475,7 +3463,6 @@ define zeroext i16 @atomic_blsi_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB53_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: testl %eax, %esi
; X86-NEXT: je .LBB53_3
; X86-NEXT: # %bb.4: # %if.then
@@ -3508,7 +3495,6 @@ define zeroext i16 @atomic_blsi_and_16_gpr_brnz(ptr %v, i16 zeroext %c) nounwind
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB53_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: testl %eax, %ecx
; X64-NEXT: je .LBB53_3
; X64-NEXT: # %bb.4: # %if.then
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 7f50cac5e4290..6103ad35b2a53 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -3713,8 +3713,6 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE-NEXT: shrl $15, %ecx
; SSE-NEXT: movl %eax, %edx
; SSE-NEXT: shrl $8, %edx
-; SSE-NEXT: andl $1, %edx
-; SSE-NEXT: andl $8, %eax
; SSE-NEXT: shrl $3, %eax
; SSE-NEXT: xorl %edx, %eax
; SSE-NEXT: andl %ecx, %eax
@@ -3729,8 +3727,6 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
; AVX1OR2-NEXT: shrl $15, %ecx
; AVX1OR2-NEXT: movl %eax, %edx
; AVX1OR2-NEXT: shrl $8, %edx
-; AVX1OR2-NEXT: andl $1, %edx
-; AVX1OR2-NEXT: andl $8, %eax
; AVX1OR2-NEXT: shrl $3, %eax
; AVX1OR2-NEXT: xorl %edx, %eax
; AVX1OR2-NEXT: andl %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/pr34137.ll b/llvm/test/CodeGen/X86/pr34137.ll
index 09a5cbb02cc26..643863d61aad3 100644
--- a/llvm/test/CodeGen/X86/pr34137.ll
+++ b/llvm/test/CodeGen/X86/pr34137.ll
@@ -15,10 +15,10 @@ define void @pr34127() {
; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movzwl var_3(%rip), %ecx
; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: testl %eax, %ecx
+; CHECK-NEXT: testw %ax, %cx
; CHECK-NEXT: sete %dl
-; CHECK-NEXT: andl %ecx, %edx
-; CHECK-NEXT: movq %rdx, var_212(%rip)
+; CHECK-NEXT: andl %edx, %ecx
+; CHECK-NEXT: movq %rcx, var_212(%rip)
; CHECK-NEXT: movw $0, (%rax)
; CHECK-NEXT: retq
entry:
>From 7c75b23668ffe06e477d3613fcfb21ba50088cb5 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sun, 8 Mar 2026 12:32:15 -0400
Subject: [PATCH 2/3] Fix AMDGPU regression properly
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 5 ++---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 ++++++---
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f3f38359ebcf0..96a1a6f2ca7a4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1513,13 +1513,12 @@ bool TargetLowering::SimplifyDemandedBits(
Known2, TLO, Depth + 1))
return true;
- // FIXME: Pretty much all these extra conditions are to avoid regressions in
- // x86 and AMDGPU.
+ // FIXME: Op1Opc checks are to avoid regressions in
+ // x86 codegen.
unsigned Op1Opc = Op1.getOpcode();
if (!VT.isVector() &&
(Op1Opc == ISD::ZERO_EXTEND || Op1Opc == ISD::SIGN_EXTEND ||
Op1Opc == ISD::ANY_EXTEND || Op1Opc == ISD::TRUNCATE) &&
- Op1.getOperand(0).getValueType().getScalarType() != MVT::i1 &&
(~Known2.Zero & DemandedBits) != DemandedBits) {
Known2 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
if (SimplifyDemandedBits(Op1, ~Known2.Zero & DemandedBits, DemandedElts,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1ee43ab8d8172..022ff79f4941f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13821,9 +13821,12 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
}
if (VT == MVT::i32 && (RHS.getOpcode() == ISD::SIGN_EXTEND ||
- LHS.getOpcode() == ISD::SIGN_EXTEND)) {
- // and x, (sext cc from i1) => select cc, x, 0
- if (RHS.getOpcode() != ISD::SIGN_EXTEND)
+ RHS.getOpcode() == ISD::ANY_EXTEND ||
+ LHS.getOpcode() == ISD::SIGN_EXTEND ||
+ LHS.getOpcode() == ISD::ANY_EXTEND)) {
+ // and x, (sext/anyext cc from i1) => select cc, x, 0
+ if (RHS.getOpcode() != ISD::SIGN_EXTEND &&
+ RHS.getOpcode() != ISD::ANY_EXTEND)
std::swap(LHS, RHS);
if (isBoolSGPR(RHS.getOperand(0)))
return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0), LHS,
>From 4d31497beee7d895155ab66638bba133643b8ce9 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sun, 8 Mar 2026 19:30:18 -0400
Subject: [PATCH 3/3] e
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 9 +---
llvm/lib/Target/X86/X86ISelLowering.cpp | 50 +++++++++++++++++--
llvm/test/CodeGen/X86/apx/ccmp.ll | 3 +-
llvm/test/CodeGen/X86/test-shrink-bug.ll | 2 +-
4 files changed, 50 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 96a1a6f2ca7a4..c2d2652508259 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1513,13 +1513,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known2, TLO, Depth + 1))
return true;
- // FIXME: Op1Opc checks are to avoid regressions in
- // x86 codegen.
- unsigned Op1Opc = Op1.getOpcode();
- if (!VT.isVector() &&
- (Op1Opc == ISD::ZERO_EXTEND || Op1Opc == ISD::SIGN_EXTEND ||
- Op1Opc == ISD::ANY_EXTEND || Op1Opc == ISD::TRUNCATE) &&
- (~Known2.Zero & DemandedBits) != DemandedBits) {
+ // If we have any bits that are known zero, try to simplify the other side.
+ if (!VT.isVector() && (~Known2.Zero & DemandedBits) != DemandedBits) {
Known2 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
if (SimplifyDemandedBits(Op1, ~Known2.Zero & DemandedBits, DemandedElts,
Known, TLO, Depth + 1))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a1aaa1891e6e3..8ade76b2198e6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52689,10 +52689,38 @@ static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
if (!ST.hasCCMP())
return SDValue();
- SDValue SetCC0 = N->getOperand(0);
- SDValue SetCC1 = N->getOperand(1);
- if (SetCC0.getOpcode() != X86ISD::SETCC ||
- SetCC1.getOpcode() != X86ISD::SETCC)
+ // Helper to match X86ISD::SETCC and track inversion/extensions.
+ // Returns {SETCC node, Inverted}
+ auto MatchSetCC = [&](SDValue V) -> std::pair<SDValue, bool> {
+ bool Inverted = false;
+ while (true) {
+ if (V.getOpcode() == ISD::ANY_EXTEND || V.getOpcode() == ISD::ZERO_EXTEND ||
+ V.getOpcode() == ISD::SIGN_EXTEND || V.getOpcode() == ISD::TRUNCATE ||
+ V.getOpcode() == ISD::BITCAST) {
+ V = V.getOperand(0);
+ continue;
+ }
+ if (V.getOpcode() == ISD::XOR && isOneConstant(V.getOperand(1))) {
+ Inverted = !Inverted;
+ V = V.getOperand(0);
+ continue;
+ }
+ if (isBitwiseNot(V)) {
+ Inverted = !Inverted;
+ V = V.getOperand(0);
+ continue;
+ }
+ break;
+ }
+ if (V.getOpcode() == X86ISD::SETCC)
+ return {V, Inverted};
+ return {SDValue(), false};
+ };
+
+ auto [SetCC0, Inverted0] = MatchSetCC(N->getOperand(0));
+ auto [SetCC1, Inverted1] = MatchSetCC(N->getOperand(1));
+
+ if (!SetCC0 || !SetCC1)
return SDValue();
auto GetCombineToOpc = [&](SDValue V) -> unsigned {
@@ -52711,12 +52739,16 @@ static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
// appear on the right.
if (!(NewOpc = GetCombineToOpc(SetCC1))) {
std::swap(SetCC0, SetCC1);
+ std::swap(Inverted0, Inverted1);
if (!(NewOpc = GetCombineToOpc(SetCC1)))
return SDValue();
}
X86::CondCode CC0 =
static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
+ if (Inverted0)
+ CC0 = X86::GetOppositeBranchCondition(CC0);
+
// CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
return SDValue();
@@ -52730,9 +52762,15 @@ static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
SDLoc(SetCC0.getOperand(0)), MVT::i8)
: SetCC0.getOperand(0);
+ if (Inverted0)
+ SrcCC = DAG.getTargetConstant(CC0, SDLoc(SetCC0.getOperand(0)), MVT::i8);
+
SDValue CC1N = SetCC1.getOperand(0);
X86::CondCode CC1 =
static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
+ if (Inverted1)
+ CC1 = X86::GetOppositeBranchCondition(CC1);
+
X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
SDLoc DL(N);
@@ -52750,7 +52788,9 @@ static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
{Sub.getOperand(0), Sub.getOperand(0),
CFlags, SrcCC, SetCC0.getOperand(1)});
- return DAG.getNode(X86ISD::SETCC, DL, MVT::i8, {CC1N, CCMP});
+ SDValue CC1Const = DAG.getTargetConstant(CC1, DL, MVT::i8);
+ SDValue Res = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, {CC1Const, CCMP});
+ return DAG.getAnyExtOrTrunc(Res, DL, N->getValueType(0));
}
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/X86/apx/ccmp.ll b/llvm/test/CodeGen/X86/apx/ccmp.ll
index a9cf0c060e729..2f1937a6ca719 100644
--- a/llvm/test/CodeGen/X86/apx/ccmp.ll
+++ b/llvm/test/CodeGen/X86/apx/ccmp.ll
@@ -381,7 +381,8 @@ define void @ccmp64rr_of(i64 %a, i64 %b, i64 %c) {
; SETZUCC-NEXT: cmpq %rdx, %rdi # encoding: [0x48,0x39,0xd7]
; SETZUCC-NEXT: setzub %al # encoding: [0x62,0xf4,0x7f,0x18,0x42,0xc0]
; SETZUCC-NEXT: cmpq %rsi, %rdi # encoding: [0x48,0x39,0xf7]
-; SETZUCC-NEXT: setzuno %cl # encoding: [0x62,0xf4,0x7f,0x18,0x41,0xc1]
+; SETZUCC-NEXT: setzuo %cl # encoding: [0x62,0xf4,0x7f,0x18,0x40,0xc1]
+; SETZUCC-NEXT: notb %cl # encoding: [0xf6,0xd1]
; SETZUCC-NEXT: testb %cl, %al # encoding: [0x84,0xc8]
; SETZUCC-NEXT: jne .LBB6_1 # encoding: [0x75,A]
; SETZUCC-NEXT: # fixup A - offset: 1, value: .LBB6_1, kind: FK_PCRel_1
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index 1d5da42a8c09b..8a6899b8aa032 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -67,7 +67,7 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
; CHECK-X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
-; CHECK-X64-NEXT: xorb $1, %al
+; CHECK-X64-NEXT: notb %al
; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107
; CHECK-X64-NEXT: setne %cl
; CHECK-X64-NEXT: testb %al, %cl
More information about the llvm-commits
mailing list