[llvm] 076dbc0 - [X86] SimplifyDemandedVectorEltsForTargetNode - add X86ISD::VZEXT_LOAD handling.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 4 09:23:24 PST 2024
Author: Simon Pilgrim
Date: 2024-01-04T17:17:07Z
New Revision: 076dbc02724681c7d3664959d5ae742099b7edb6
URL: https://github.com/llvm/llvm-project/commit/076dbc02724681c7d3664959d5ae742099b7edb6
DIFF: https://github.com/llvm/llvm-project/commit/076dbc02724681c7d3664959d5ae742099b7edb6.diff
LOG: [X86] SimplifyDemandedVectorEltsForTargetNode - add X86ISD::VZEXT_LOAD handling.
Simplify to a scalar_to_vector(load()) if we don't demand any of the upper vector elements.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/buildvec-insertvec.ll
llvm/test/CodeGen/X86/fminimum-fmaximum.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e0679f5f27d8c7..fe3ba2ae29179e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41348,6 +41348,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return TLO.CombineTo(Op, Src);
break;
}
+ case X86ISD::VZEXT_LOAD: {
+ // If upper demanded elements are not demanded then simplify to a
+ // scalar_to_vector(load()).
+ MVT SVT = VT.getSimpleVT().getVectorElementType();
+ if (DemandedElts == 1 && Op.getValue(1).use_empty() && isTypeLegal(SVT)) {
+ SDLoc DL(Op);
+ auto *Mem = cast<MemSDNode>(Op);
+ SDValue Elt = TLO.DAG.getLoad(SVT, DL, Mem->getChain(), Mem->getBasePtr(),
+ Mem->getMemOperand());
+ SDValue Vec = TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Elt);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Vec));
+ }
+ break;
+ }
case X86ISD::VBROADCAST: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
index a3568716edd9e9..3fdfde8576f777 100644
--- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
@@ -799,9 +799,8 @@ define i32 @PR46586(ptr %p, <4 x i32> %v) {
;
; SSE41-LABEL: PR46586:
; SSE41: # %bb.0:
-; SSE41-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE41-NEXT: movzbl 3(%rdi), %eax
; SSE41-NEXT: extractps $3, %xmm0, %ecx
-; SSE41-NEXT: pextrb $3, %xmm1, %eax
; SSE41-NEXT: xorl %edx, %edx
; SSE41-NEXT: divl %ecx
; SSE41-NEXT: movl %edx, %eax
@@ -809,9 +808,8 @@ define i32 @PR46586(ptr %p, <4 x i32> %v) {
;
; AVX-LABEL: PR46586:
; AVX: # %bb.0:
-; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: movzbl 3(%rdi), %eax
; AVX-NEXT: vextractps $3, %xmm0, %ecx
-; AVX-NEXT: vpextrb $3, %xmm1, %eax
; AVX-NEXT: xorl %edx, %edx
; AVX-NEXT: divl %ecx
; AVX-NEXT: movl %edx, %eax
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 5bb5d1e9c17ec8..8905d2bce5e92a 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -699,10 +699,9 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; X86-NEXT: vextractps $1, %xmm2, %eax
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vextractps $1, %xmm0, %eax
; X86-NEXT: testl %eax, %eax
; X86-NEXT: js .LBB14_1
; X86-NEXT: # %bb.2:
More information about the llvm-commits
mailing list