[llvm] b85e7f0 - [X86] isSplatValueForTargetNode - test source value for vector uniform shift ops (#186619)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 14 13:40:21 PDT 2026
Author: Simon Pilgrim
Date: 2026-03-14T20:40:16Z
New Revision: b85e7f04397752926175d72b953fa428f08c656a
URL: https://github.com/llvm/llvm-project/commit/b85e7f04397752926175d72b953fa428f08c656a
DIFF: https://github.com/llvm/llvm-project/commit/b85e7f04397752926175d72b953fa428f08c656a.diff
LOG: [X86] isSplatValueForTargetNode - test source value for vector uniform shift ops (#186619)
For old SSE style vector shifts, we just need to check the shifted value is a splat as the shift amount is uniform
Avoids an unnecessary variable shuffle in i512 ashr expansion
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/shift-i512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ddba1c7dbdf00..4fbbf63c39065 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45827,6 +45827,14 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
case X86ISD::VBROADCAST_LOAD:
UndefElts = APInt::getZero(NumElts);
return true;
+ case X86ISD::VSHL:
+ case X86ISD::VSRA:
+ case X86ISD::VSRL:
+ case X86ISD::VSHLI:
+ case X86ISD::VSRAI:
+ case X86ISD::VSRLI:
+ return DAG.isSplatValue(Op.getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
}
return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,
diff --git a/llvm/test/CodeGen/X86/shift-i512.ll b/llvm/test/CodeGen/X86/shift-i512.ll
index 01cd11b9e712c..fa854663d38f9 100644
--- a/llvm/test/CodeGen/X86/shift-i512.ll
+++ b/llvm/test/CodeGen/X86/shift-i512.ll
@@ -604,8 +604,8 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
; AVX512F-NEXT: andl $56, %eax
; AVX512F-NEXT: vmovdqu64 -128(%rsp,%rax), %zmm1
; AVX512F-NEXT: vpsrlq %xmm0, %zmm1, %zmm2
-; AVX512F-NEXT: vpsraq $63, -72(%rsp,%rax){1to8}, %zmm3
-; AVX512F-NEXT: valignq {{.*#+}} zmm3 = zmm3[7,0,1,2,3,4,5,6]
+; AVX512F-NEXT: vpbroadcastq -72(%rsp,%rax), %xmm3
+; AVX512F-NEXT: vpsraq $63, %zmm3, %zmm3
; AVX512F-NEXT: valignq {{.*#+}} zmm1 = zmm1[1,2,3,4,5,6,7],zmm3[0]
; AVX512F-NEXT: vpaddq %zmm1, %zmm1, %zmm1
; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -639,20 +639,19 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %eax, %ecx
-; AVX512VL-NEXT: shrl $3, %ecx
-; AVX512VL-NEXT: andl $56, %ecx
-; AVX512VL-NEXT: vpsraq $63, -72(%rsp,%rcx){1to8}, %zmm0
-; AVX512VL-NEXT: vmovdqu64 -128(%rsp,%rcx), %zmm1
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,2,3,4,5,6,7,15]
-; AVX512VL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; AVX512VL-NEXT: vpaddq %zmm2, %zmm2, %zmm0
-; AVX512VL-NEXT: andl $63, %eax
-; AVX512VL-NEXT: vpbroadcastq %rax, %xmm2
-; AVX512VL-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm3
-; AVX512VL-NEXT: vpsllq %xmm3, %zmm0, %zmm0
-; AVX512VL-NEXT: vpsrlq %xmm2, %zmm1, %zmm1
+; AVX512VL-NEXT: andl $63, %ecx
+; AVX512VL-NEXT: vpbroadcastq %rcx, %xmm0
+; AVX512VL-NEXT: shrl $3, %eax
+; AVX512VL-NEXT: andl $56, %eax
+; AVX512VL-NEXT: vmovdqu64 -128(%rsp,%rax), %zmm1
+; AVX512VL-NEXT: vpsrlq %xmm0, %zmm1, %zmm2
+; AVX512VL-NEXT: vpsraq $63, -72(%rsp,%rax){1to2}, %xmm3
+; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm1[1,2,3,4,5,6,7],zmm3[0]
+; AVX512VL-NEXT: vpaddq %zmm1, %zmm1, %zmm1
+; AVX512VL-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX512VL-NEXT: vpsllq %xmm0, %zmm1, %zmm0
; AVX512VL-NEXT: movq %rdi, %rax
-; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, 32(%rdi)
; AVX512VL-NEXT: vmovdqu %ymm0, (%rdi)
; AVX512VL-NEXT: popq %rcx
@@ -663,34 +662,33 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: pushq %rax
; AVX512VBMI-NEXT: movq %rdi, %rax
-; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: sarq $63, %rdi
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; AVX512VBMI-NEXT: vpbroadcastq %r10, %zmm0
-; AVX512VBMI-NEXT: # kill: def $r10d killed $r10d killed $r10 def $r10
-; AVX512VBMI-NEXT: shrl $3, %r10d
-; AVX512VBMI-NEXT: andl $56, %r10d
-; AVX512VBMI-NEXT: vpsraq $63, -72(%rsp,%r10){1to8}, %zmm1
-; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%r10), %zmm2
-; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,2,3,4,5,6,7,15]
-; AVX512VBMI-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; AVX512VBMI-NEXT: vpshrdvq %zmm0, %zmm3, %zmm2
+; AVX512VBMI-NEXT: sarq $63, %r10
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
+; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0
+; AVX512VBMI-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi
+; AVX512VBMI-NEXT: shrl $3, %edi
+; AVX512VBMI-NEXT: andl $56, %edi
+; AVX512VBMI-NEXT: vpsraq $63, -72(%rsp,%rdi){1to2}, %xmm1
+; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm2
+; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm2[1,2,3,4,5,6,7],zmm1[0]
+; AVX512VBMI-NEXT: vpshrdvq %zmm0, %zmm1, %zmm2
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm2, 32(%rax)
; AVX512VBMI-NEXT: vmovdqu %ymm2, (%rax)
; AVX512VBMI-NEXT: popq %rcx
More information about the llvm-commits
mailing list