[llvm] [DAG]SimplifyDemandedVectorElts-add ISD::AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes (PR #86284)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 30 02:20:17 PDT 2024
https://github.com/aniplcc updated https://github.com/llvm/llvm-project/pull/86284
>From 9cea11aa34c21f9e0e2f0da6d224d1ddd362f01e Mon Sep 17 00:00:00 2001
From: aniplcc <aniplccode at gmail.com>
Date: Fri, 22 Mar 2024 18:53:37 +0530
Subject: [PATCH 1/2] [DAG]Add ISD::AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes
to SimplifyDemandedVectorElts
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++
llvm/test/CodeGen/AArch64/hadd-combine.ll | 52 +++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index da29b1d5b312f8..58d9394feb1237 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3524,6 +3524,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
[[fallthrough]];
}
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
case ISD::OR:
case ISD::XOR:
case ISD::SUB:
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index e12502980790da..7c1c089839edc5 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -879,6 +879,58 @@ define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %res
}
+define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: shadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
+define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: srhadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
+define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: uhadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
+define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: urhadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
>From 7b495b89c2d83c7a61ab734feb6cbc6392e04a95 Mon Sep 17 00:00:00 2001
From: aniplcc <aniplccode at gmail.com>
Date: Sat, 30 Mar 2024 14:33:14 +0530
Subject: [PATCH 2/2] update combine-pavg.ll
---
llvm/test/CodeGen/X86/combine-pavg.ll | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/test/CodeGen/X86/combine-pavg.ll b/llvm/test/CodeGen/X86/combine-pavg.ll
index 7a8ddf5178d3d8..cb2d426a52b4b2 100644
--- a/llvm/test/CodeGen/X86/combine-pavg.ll
+++ b/llvm/test/CodeGen/X86/combine-pavg.ll
@@ -84,25 +84,22 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16
define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: combine_pavgw_demandedelts:
; SSE: # %bb.0:
-; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
; SSE-NEXT: pavgw %xmm1, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_pavgw_demandedelts:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_pavgw_demandedelts:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
; AVX2-NEXT: retq
%s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1)
More information about the llvm-commits
mailing list