[llvm] [AArch64] Add patterns for constructive splice. (PR #113912)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 28 07:23:59 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
SVE2 adds the constructive splice instruction, which takes a tuple.
Even though the register allocator must ensure that the tuple uses
consecutive registers for the tuple, it's likely to be more efficient
than using the destructive splice instruction when the first operand
is reused.
---
Patch is 125.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113912.diff
9 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+1-1)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+23-1)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll (+50-34)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+15-17)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+102-568)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+91-91)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll (+80-80)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll (+5-5)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll (+470-470)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index dc96b249c4e40c..65a5c2157ec498 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3846,7 +3846,7 @@ let Predicates = [HasSVE2] in {
let Predicates = [HasSVE2orSME] in {
// SVE2 vector splice (constructive)
- defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
+ defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice", AArch64splice>;
} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 02ee0fe9244572..ea6c826382871e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7245,11 +7245,33 @@ class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
let hasSideEffects = 0;
}
-multiclass sve2_int_perm_splice_cons<string asm> {
+multiclass sve2_int_perm_splice_cons<string asm, SDPatternOperator op> {
def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8, ZZ_b>;
def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>;
def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>;
def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>;
+
+ let AddedComplexity = 2 in {
+ foreach VT = [nxv16i8] in
+ def : Pat<(VT (op nxv16i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _B)
+ nxv16i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+
+ foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
+ def : Pat<(VT (op nxv8i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _H)
+ nxv8i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+
+ foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
+ def : Pat<(VT (op nxv4i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _S)
+ nxv4i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+
+ foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
+ def : Pat<(VT (op nxv2i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _D)
+ nxv2i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+ }
}
class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index c1810c678ea522..6e2ecfca9e963e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -61,10 +61,10 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) {
define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-LABEL: concat_v16i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -172,10 +172,10 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) {
define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-LABEL: concat_v8i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -270,10 +270,10 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) {
define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-LABEL: concat_v4i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -340,10 +340,10 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-LABEL: concat_v2i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -406,17 +406,33 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
;
define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
-; CHECK-LABEL: concat_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: mov z2.h, z1.h[1]
-; CHECK-NEXT: mov z3.h, z0.h[1]
-; CHECK-NEXT: zip1 z1.h, z1.h, z2.h
-; CHECK-NEXT: zip1 z0.h, z0.h, z3.h
-; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: ret
+; SVE2-LABEL: concat_v4f16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: cnth x8
+; SVE2-NEXT: adrp x9, .LCPI15_0
+; SVE2-NEXT: adrp x10, .LCPI15_1
+; SVE2-NEXT: mov z2.h, w8
+; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0]
+; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1]
+; SVE2-NEXT: ptrue p0.h, vl8
+; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h
+; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT: ret
+;
+; SME-LABEL: concat_v4f16:
+; SME: // %bb.0:
+; SME-NEXT: // kill: def $d1 killed $d1 def $z1
+; SME-NEXT: // kill: def $d0 killed $d0 def $z0
+; SME-NEXT: mov z2.h, z1.h[1]
+; SME-NEXT: mov z3.h, z0.h[1]
+; SME-NEXT: zip1 z1.h, z1.h, z2.h
+; SME-NEXT: zip1 z0.h, z0.h, z3.h
+; SME-NEXT: zip1 z0.s, z0.s, z1.s
+; SME-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SME-NEXT: ret
;
; NONEON-NOSVE-LABEL: concat_v4f16:
; NONEON-NOSVE: // %bb.0:
@@ -436,10 +452,10 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-LABEL: concat_v8f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -534,10 +550,10 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) {
define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-LABEL: concat_v4f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -604,10 +620,10 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-LABEL: concat_v2f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index f1771a753826cc..2282e74af5d006 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=SVE2
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -842,16 +842,16 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
;
; SVE2-LABEL: test_copysign_v4f32_v4f64:
; SVE2: // %bb.0:
-; SVE2-NEXT: ldp q0, q1, [x1]
+; SVE2-NEXT: ldp q1, q0, [x1]
; SVE2-NEXT: ptrue p0.d
-; SVE2-NEXT: ldr q2, [x0]
-; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
; SVE2-NEXT: fcvt z0.s, p0/m, z0.d
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
; SVE2-NEXT: ptrue p0.s, vl2
-; SVE2-NEXT: uzp1 z1.s, z1.s, z1.s
-; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s
-; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
+; SVE2-NEXT: uzp1 z3.s, z0.s, z0.s
+; SVE2-NEXT: uzp1 z2.s, z1.s, z1.s
; SVE2-NEXT: mov z1.s, #0x7fffffff
+; SVE2-NEXT: splice z0.s, p0, { z2.s, z3.s }
+; SVE2-NEXT: ldr q2, [x0]
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str q2, [x0]
; SVE2-NEXT: ret
@@ -1237,16 +1237,16 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
;
; SVE2-LABEL: test_copysign_v8f16_v8f32:
; SVE2: // %bb.0:
-; SVE2-NEXT: ldp q0, q1, [x1]
+; SVE2-NEXT: ldp q1, q0, [x1]
; SVE2-NEXT: ptrue p0.s
-; SVE2-NEXT: ldr q2, [x0]
-; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
+; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
; SVE2-NEXT: ptrue p0.h, vl4
-; SVE2-NEXT: uzp1 z1.h, z1.h, z1.h
-; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
-; SVE2-NEXT: splice z0.h, p0, z0.h, z1.h
+; SVE2-NEXT: uzp1 z3.h, z0.h, z0.h
+; SVE2-NEXT: uzp1 z2.h, z1.h, z1.h
; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
+; SVE2-NEXT: splice z0.h, p0, { z2.h, z3.h }
+; SVE2-NEXT: ldr q2, [x0]
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str q2, [x0]
; SVE2-NEXT: ret
@@ -1349,5 +1349,3 @@ declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0
declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 516772b8ca6640..1fdcd4f8268708 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -26,19 +25,6 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v4i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
-; NEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
-; NEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -85,27 +71,12 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v8i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
-; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; NEON-NOSVE-NEXT: xtn v0.8b, v0.8h
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -177,45 +148,21 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v16i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: sshll2 v2.8h, v1.16b, #0
-; NEON-NOSVE-NEXT: sshll2 v3.8h, v0.16b, #0
-; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
-; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sshll2 v4.4s, v2.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v5.4s, v3.8h, #0
-; NEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
-; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
-; NEON-NOSVE-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
-; NEON-NOSVE-NEXT: sshll2 v5.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: sshll2 v3.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
-; NEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -319,7 +266,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sunpklo z4.h, z2.b
; CHECK-NEXT: sunpklo z2.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: sunpklo z5.s, z4.h
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
; CHECK-NEXT: sunpklo z3.s, z3.h
@@ -328,7 +274,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q5, [x0]
; CHECK-NEXT: sunpklo z16.h, z5.b
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-NEXT: sunpklo z5.h, z5.b
; CHECK-NEXT: sunpklo z18.s, z16.h
; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
@@ -337,81 +282,36 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sunpklo z18.s, z5.h
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
; CHECK-NEXT: sunpklo z5.s, z5.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: sunpklo z16.s, z6.h
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s
+; CHECK-NEXT: uzp1 z18.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z19.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h
; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h
+; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT: uzp1 z4.h, z17.h, z17.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: splice z4.h, p0, z4.h, z6.h
-; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT: uzp1 z5.h, z3...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/113912
More information about the llvm-commits
mailing list