[llvm] [NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled (PR #130252)
Sushant Gokhale via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 22:12:24 PST 2025
https://github.com/sushgokh created https://github.com/llvm/llvm-project/pull/130252
With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.
>From cd35e67d07efaddaa05f561cc481c2840d367e4b Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Thu, 6 Mar 2025 22:07:19 -0800
Subject: [PATCH] [NFC][AArch64] test for sdiv with fixed-width vectors,
pow2-divisor and SVE enabled
With SVE enabled, this should generate asrd instruction. Subsequent
patch will address this.
---
.../AArch64/sve-fixed-length-sdiv-pow2.ll | 103 ++++++++++++++++++
1 file changed, 103 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d5..e6ee64861c76b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -1,10 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_128
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
target triple = "aarch64-unknown-linux-gnu"
+define <4 x i32> @sdiv_v4i32_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v4i32_packed:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT: usra v0.4s, v1.4s, #29
+; CHECK-NEXT: sshr v0.4s, v0.4s, #3
+; CHECK-NEXT: ret
+ %res = sdiv <4 x i32> %op1, splat (i32 8)
+ ret <4 x i32> %res
+}
+
+define <2 x i32> @sdiv_v2i32_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v2i32_unpacked:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
+; CHECK-NEXT: usra v0.2s, v1.2s, #29
+; CHECK-NEXT: sshr v0.2s, v0.2s, #3
+; CHECK-NEXT: ret
+ %res = sdiv <2 x i32> %op1, splat (i32 8)
+ ret <2 x i32> %res
+}
+
define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v8i8:
; CHECK: // %bb.0:
@@ -45,6 +68,26 @@ define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v64i8(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v64i8:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.16b, v0.16b, #0
+; VBITS_GE_128-NEXT: cmlt v5.16b, v1.16b, #0
+; VBITS_GE_128-NEXT: cmlt v6.16b, v3.16b, #0
+; VBITS_GE_128-NEXT: usra v0.16b, v2.16b, #3
+; VBITS_GE_128-NEXT: cmlt v2.16b, v4.16b, #0
+; VBITS_GE_128-NEXT: usra v1.16b, v5.16b, #3
+; VBITS_GE_128-NEXT: usra v3.16b, v6.16b, #3
+; VBITS_GE_128-NEXT: usra v4.16b, v2.16b, #3
+; VBITS_GE_128-NEXT: sshr v0.16b, v0.16b, #5
+; VBITS_GE_128-NEXT: sshr v1.16b, v1.16b, #5
+; VBITS_GE_128-NEXT: sshr v2.16b, v3.16b, #5
+; VBITS_GE_128-NEXT: sshr v3.16b, v4.16b, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
@@ -139,6 +182,26 @@ define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v32i16(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v32i16:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.8h, v0.8h, #0
+; VBITS_GE_128-NEXT: cmlt v5.8h, v1.8h, #0
+; VBITS_GE_128-NEXT: cmlt v6.8h, v3.8h, #0
+; VBITS_GE_128-NEXT: usra v0.8h, v2.8h, #11
+; VBITS_GE_128-NEXT: cmlt v2.8h, v4.8h, #0
+; VBITS_GE_128-NEXT: usra v1.8h, v5.8h, #11
+; VBITS_GE_128-NEXT: usra v3.8h, v6.8h, #11
+; VBITS_GE_128-NEXT: usra v4.8h, v2.8h, #11
+; VBITS_GE_128-NEXT: sshr v0.8h, v0.8h, #5
+; VBITS_GE_128-NEXT: sshr v1.8h, v1.8h, #5
+; VBITS_GE_128-NEXT: sshr v2.8h, v3.8h, #5
+; VBITS_GE_128-NEXT: sshr v3.8h, v4.8h, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
@@ -234,6 +297,26 @@ define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v16i32(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v16i32:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.4s, v0.4s, #0
+; VBITS_GE_128-NEXT: cmlt v5.4s, v1.4s, #0
+; VBITS_GE_128-NEXT: cmlt v6.4s, v3.4s, #0
+; VBITS_GE_128-NEXT: usra v0.4s, v2.4s, #27
+; VBITS_GE_128-NEXT: cmlt v2.4s, v4.4s, #0
+; VBITS_GE_128-NEXT: usra v1.4s, v5.4s, #27
+; VBITS_GE_128-NEXT: usra v3.4s, v6.4s, #27
+; VBITS_GE_128-NEXT: usra v4.4s, v2.4s, #27
+; VBITS_GE_128-NEXT: sshr v0.4s, v0.4s, #5
+; VBITS_GE_128-NEXT: sshr v1.4s, v1.4s, #5
+; VBITS_GE_128-NEXT: sshr v2.4s, v3.4s, #5
+; VBITS_GE_128-NEXT: sshr v3.4s, v4.4s, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
@@ -329,6 +412,26 @@ define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
}
define void @sdiv_v8i64(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v8i64:
+; VBITS_GE_128: // %bb.0:
+; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT: cmlt v2.2d, v0.2d, #0
+; VBITS_GE_128-NEXT: cmlt v5.2d, v1.2d, #0
+; VBITS_GE_128-NEXT: cmlt v6.2d, v3.2d, #0
+; VBITS_GE_128-NEXT: usra v0.2d, v2.2d, #59
+; VBITS_GE_128-NEXT: cmlt v2.2d, v4.2d, #0
+; VBITS_GE_128-NEXT: usra v1.2d, v5.2d, #59
+; VBITS_GE_128-NEXT: usra v3.2d, v6.2d, #59
+; VBITS_GE_128-NEXT: usra v4.2d, v2.2d, #59
+; VBITS_GE_128-NEXT: sshr v0.2d, v0.2d, #5
+; VBITS_GE_128-NEXT: sshr v1.2d, v1.2d, #5
+; VBITS_GE_128-NEXT: sshr v2.2d, v3.2d, #5
+; VBITS_GE_128-NEXT: sshr v3.2d, v4.2d, #5
+; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT: stp q2, q3, [x0]
+; VBITS_GE_128-NEXT: ret
+;
; VBITS_GE_256-LABEL: sdiv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
More information about the llvm-commits
mailing list