[llvm] aa2dac4 - [DAG] SimplifyDemandedBits - fold FSHR(X,Y,Amt) -> SRL(Y,Amt) (#182294)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 19 10:29:39 PST 2026
Author: Simon Pilgrim
Date: 2026-02-19T18:29:34Z
New Revision: aa2dac40de5c1d9c4e1d00d445d90621fe4996fc
URL: https://github.com/llvm/llvm-project/commit/aa2dac40de5c1d9c4e1d00d445d90621fe4996fc
DIFF: https://github.com/llvm/llvm-project/commit/aa2dac40de5c1d9c4e1d00d445d90621fe4996fc.diff
LOG: [DAG] SimplifyDemandedBits - fold FSHR(X,Y,Amt) -> SRL(Y,Amt) (#182294)
If a FSHR node's DemandedBits mask and maximum shift amount doesn't
demand any bits from the X upper register, then simplify to a SRL node.
FSHL is less useful but we could add it as a future patch if there's
interest
Based off a discussion on #182021
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/add-sub-bool.ll
llvm/test/CodeGen/X86/bittest-big-integer.ll
llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
llvm/test/CodeGen/X86/known-pow2.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index de0ec493aba7d..9dacc28f439d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2256,11 +2256,31 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- // For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
+ // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
+ // iff we're guaranteed not to use Op0.
+ // TODO: Add FSHL equivalent?
+ if (!IsFSHL && !DemandedBits.isAllOnes() &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
+ KnownBits KnownAmt =
+ TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
+ unsigned MaxShiftAmt =
+ KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
+ // Check we don't demand any shifted bits outside Op1.
+ if (DemandedBits.countl_zero() >= MaxShiftAmt) {
+ EVT AmtVT = Op2.getValueType();
+ SDValue NewAmt =
+ TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
+ TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
+ SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
- if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
- Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
}
break;
diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index 1df284fb9fe2c..85c14d1e0ac04 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -392,30 +392,27 @@ define i32 @test_i32_add_add_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin
define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
; X86-LABEL: test_i64_add_add_var:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: shrl %cl, %esi
; X86-NEXT: shrl %cl, %edi
-; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: testb $32, %cl
; X86-NEXT: jne .LBB15_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: .LBB15_2:
-; X86-NEXT: andl $1, %edi
-; X86-NEXT: addl %edi, %eax
+; X86-NEXT: andl $1, %esi
+; X86-NEXT: addl %esi, %eax
; X86-NEXT: adcl $0, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: test_i64_add_add_var:
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
index 023fb5065b892..7070848e3fe3e 100644
--- a/llvm/test/CodeGen/X86/bittest-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -1039,14 +1039,14 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
; SSE2: # %bb.0:
; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
; SSE2-NEXT: movdqa (%rdi), %xmm0
-; SSE2-NEXT: movq 8(%rdi), %rax
-; SSE2-NEXT: movq %xmm0, %rdx
+; SSE2-NEXT: movq %xmm0, %rax
+; SSE2-NEXT: movq 8(%rdi), %rdx
; SSE2-NEXT: movl %esi, %ecx
; SSE2-NEXT: andb $32, %cl
-; SSE2-NEXT: shrdq %cl, %rax, %rdx
+; SSE2-NEXT: shrq %cl, %rdx
; SSE2-NEXT: shrq %cl, %rax
; SSE2-NEXT: testb $64, %sil
-; SSE2-NEXT: cmoveq %rdx, %rax
+; SSE2-NEXT: cmovneq %rdx, %rax
; SSE2-NEXT: btcl %esi, %eax
; SSE2-NEXT: andl $96, %esi
; SSE2-NEXT: shrl $3, %esi
@@ -1057,14 +1057,14 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
; SSE4: # %bb.0:
; SSE4-NEXT: # kill: def $esi killed $esi def $rsi
; SSE4-NEXT: movdqa (%rdi), %xmm0
-; SSE4-NEXT: pextrq $1, %xmm0, %rax
-; SSE4-NEXT: movq %xmm0, %rdx
+; SSE4-NEXT: movq %xmm0, %rax
+; SSE4-NEXT: pextrq $1, %xmm0, %rdx
; SSE4-NEXT: movl %esi, %ecx
; SSE4-NEXT: andb $32, %cl
-; SSE4-NEXT: shrdq %cl, %rax, %rdx
+; SSE4-NEXT: shrq %cl, %rdx
; SSE4-NEXT: shrq %cl, %rax
; SSE4-NEXT: testb $64, %sil
-; SSE4-NEXT: cmoveq %rdx, %rax
+; SSE4-NEXT: cmovneq %rdx, %rax
; SSE4-NEXT: btcl %esi, %eax
; SSE4-NEXT: andl $96, %esi
; SSE4-NEXT: shrl $3, %esi
@@ -1075,14 +1075,14 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $esi killed $esi def $rsi
; AVX-NEXT: vmovdqa (%rdi), %xmm0
-; AVX-NEXT: vpextrq $1, %xmm0, %rax
-; AVX-NEXT: vmovq %xmm0, %rdx
-; AVX-NEXT: movl %esi, %ecx
-; AVX-NEXT: andb $32, %cl
-; AVX-NEXT: shrdq %cl, %rax, %rdx
-; AVX-NEXT: shrxq %rcx, %rax, %rax
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX-NEXT: movl %esi, %edx
+; AVX-NEXT: andb $32, %dl
+; AVX-NEXT: shrxq %rdx, %rcx, %rcx
+; AVX-NEXT: shrxq %rdx, %rax, %rax
; AVX-NEXT: testb $64, %sil
-; AVX-NEXT: cmoveq %rdx, %rax
+; AVX-NEXT: cmovneq %rcx, %rax
; AVX-NEXT: btcl %esi, %eax
; AVX-NEXT: andl $96, %esi
; AVX-NEXT: shrl $3, %esi
@@ -1386,12 +1386,12 @@ define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind {
; SSE-NEXT: xorq (%rdi), %r9
; SSE-NEXT: movl %edx, %ecx
; SSE-NEXT: andb $32, %cl
-; SSE-NEXT: movq %r9, %rax
-; SSE-NEXT: shrdq %cl, %rsi, %rax
-; SSE-NEXT: movq %rsi, %r11
+; SSE-NEXT: movq %rsi, %rax
+; SSE-NEXT: shrq %cl, %rax
+; SSE-NEXT: movq %r9, %r11
; SSE-NEXT: shrq %cl, %r11
; SSE-NEXT: testb $64, %dl
-; SSE-NEXT: cmoveq %rax, %r11
+; SSE-NEXT: cmovneq %rax, %r11
; SSE-NEXT: btl %edx, %r11d
; SSE-NEXT: setae %al
; SSE-NEXT: orq %r10, %rsi
@@ -1415,23 +1415,22 @@ define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind {
; AVX2-NEXT: xorl %r11d, %r11d
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shldq %cl, %r10, %r11
-; AVX2-NEXT: shlxq %rax, %r10, %r10
+; AVX2-NEXT: shlxq %rax, %r10, %rcx
; AVX2-NEXT: testb $64, %al
-; AVX2-NEXT: cmovneq %r10, %r11
-; AVX2-NEXT: cmovneq %r9, %r10
+; AVX2-NEXT: cmovneq %rcx, %r11
+; AVX2-NEXT: cmovneq %r9, %rcx
; AVX2-NEXT: xorq 8(%rdi), %rsi
; AVX2-NEXT: xorq (%rdi), %r8
-; AVX2-NEXT: movl %edx, %ecx
-; AVX2-NEXT: andb $32, %cl
-; AVX2-NEXT: movq %r8, %rax
-; AVX2-NEXT: shrdq %cl, %rsi, %rax
-; AVX2-NEXT: shrxq %rcx, %rsi, %rcx
+; AVX2-NEXT: movl %edx, %eax
+; AVX2-NEXT: andb $32, %al
+; AVX2-NEXT: shrxq %rax, %rsi, %r9
+; AVX2-NEXT: shrxq %rax, %r8, %rax
; AVX2-NEXT: testb $64, %dl
-; AVX2-NEXT: cmoveq %rax, %rcx
-; AVX2-NEXT: btl %edx, %ecx
+; AVX2-NEXT: cmovneq %r9, %rax
+; AVX2-NEXT: btl %edx, %eax
; AVX2-NEXT: setae %al
; AVX2-NEXT: orq %r11, %rsi
-; AVX2-NEXT: orq %r10, %r8
+; AVX2-NEXT: orq %rcx, %r8
; AVX2-NEXT: movq %r8, (%rdi)
; AVX2-NEXT: movq %rsi, 8(%rdi)
; AVX2-NEXT: retq
@@ -1451,23 +1450,22 @@ define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind {
; AVX512-NEXT: xorl %r11d, %r11d
; AVX512-NEXT: movl %eax, %ecx
; AVX512-NEXT: shldq %cl, %r9, %r11
-; AVX512-NEXT: shlxq %rax, %r9, %r9
+; AVX512-NEXT: shlxq %rax, %r9, %rcx
; AVX512-NEXT: testb $64, %al
-; AVX512-NEXT: cmovneq %r9, %r11
-; AVX512-NEXT: cmovneq %r10, %r9
+; AVX512-NEXT: cmovneq %rcx, %r11
+; AVX512-NEXT: cmovneq %r10, %rcx
; AVX512-NEXT: xorq 8(%rdi), %rsi
; AVX512-NEXT: xorq (%rdi), %r8
-; AVX512-NEXT: movl %edx, %ecx
-; AVX512-NEXT: andb $32, %cl
-; AVX512-NEXT: movq %r8, %rax
-; AVX512-NEXT: shrdq %cl, %rsi, %rax
-; AVX512-NEXT: shrxq %rcx, %rsi, %rcx
+; AVX512-NEXT: movl %edx, %eax
+; AVX512-NEXT: andb $32, %al
+; AVX512-NEXT: shrxq %rax, %rsi, %r9
+; AVX512-NEXT: shrxq %rax, %r8, %rax
; AVX512-NEXT: testb $64, %dl
-; AVX512-NEXT: cmoveq %rax, %rcx
-; AVX512-NEXT: btl %edx, %ecx
+; AVX512-NEXT: cmovneq %r9, %rax
+; AVX512-NEXT: btl %edx, %eax
; AVX512-NEXT: setae %al
; AVX512-NEXT: orq %r11, %rsi
-; AVX512-NEXT: orq %r9, %r8
+; AVX512-NEXT: orq %rcx, %r8
; AVX512-NEXT: movq %r8, (%rdi)
; AVX512-NEXT: movq %rsi, 8(%rdi)
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index fd58efc235b1f..274b31173e6ab 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -468,22 +468,22 @@ define i1 @scalar_i128_lowestbit_eq(i128 %x, i128 %y) nounwind {
; X64-BMI1: # %bb.0:
; X64-BMI1-NEXT: movl %edx, %ecx
; X64-BMI1-NEXT: andb $32, %cl
-; X64-BMI1-NEXT: shrdq %cl, %rsi, %rdi
; X64-BMI1-NEXT: shrq %cl, %rsi
+; X64-BMI1-NEXT: shrq %cl, %rdi
; X64-BMI1-NEXT: testb $64, %dl
-; X64-BMI1-NEXT: cmoveq %rdi, %rsi
-; X64-BMI1-NEXT: btl %edx, %esi
+; X64-BMI1-NEXT: cmovneq %rsi, %rdi
+; X64-BMI1-NEXT: btl %edx, %edi
; X64-BMI1-NEXT: setae %al
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: scalar_i128_lowestbit_eq:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl %edx, %ecx
-; X64-BMI2-NEXT: andb $32, %cl
-; X64-BMI2-NEXT: shrdq %cl, %rsi, %rdi
-; X64-BMI2-NEXT: shrxq %rcx, %rsi, %rax
+; X64-BMI2-NEXT: movl %edx, %eax
+; X64-BMI2-NEXT: andb $32, %al
+; X64-BMI2-NEXT: shrxq %rax, %rsi, %rcx
+; X64-BMI2-NEXT: shrxq %rax, %rdi, %rax
; X64-BMI2-NEXT: testb $64, %dl
-; X64-BMI2-NEXT: cmoveq %rdi, %rax
+; X64-BMI2-NEXT: cmovneq %rcx, %rax
; X64-BMI2-NEXT: btl %edx, %eax
; X64-BMI2-NEXT: setae %al
; X64-BMI2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 2ef5def9c0fd8..2662767b9e2db 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -877,11 +877,11 @@ define i1 @pow2_and_i128(i128 %num, i128 %shift) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: andb $32, %cl
-; CHECK-NEXT: shrdq %cl, %rsi, %rdi
; CHECK-NEXT: shrq %cl, %rsi
+; CHECK-NEXT: shrq %cl, %rdi
; CHECK-NEXT: testb $64, %dl
-; CHECK-NEXT: cmoveq %rdi, %rsi
-; CHECK-NEXT: btl %edx, %esi
+; CHECK-NEXT: cmovneq %rsi, %rdi
+; CHECK-NEXT: btl %edx, %edi
; CHECK-NEXT: setae %al
; CHECK-NEXT: retq
%mask = shl nuw i128 1, %shift
More information about the llvm-commits
mailing list