[llvm] [NFC][AArch64] test for sdiv with fixed-width vectors, pow2-divisor and SVE enabled (PR #130252)

Sushant Gokhale via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 6 22:12:24 PST 2025


https://github.com/sushgokh created https://github.com/llvm/llvm-project/pull/130252

With SVE enabled, this should generate asrd instruction. Subsequent patch will address this.

>From cd35e67d07efaddaa05f561cc481c2840d367e4b Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Thu, 6 Mar 2025 22:07:19 -0800
Subject: [PATCH] [NFC][AArch64] test for sdiv with fixed-width vectors,
 pow2-divisor and SVE enabled

With SVE enabled, this should generate asrd instruction. Subsequent
patch will address this.
---
 .../AArch64/sve-fixed-length-sdiv-pow2.ll     | 103 ++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d5..e6ee64861c76b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -1,10 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_128
 ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
 ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
 
 target triple = "aarch64-unknown-linux-gnu"
 
+define <4 x i32> @sdiv_v4i32_packed(<4 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v4i32_packed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT:    usra v0.4s, v1.4s, #29
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #3
+; CHECK-NEXT:    ret
+  %res = sdiv <4 x i32> %op1, splat (i32 8)
+  ret <4 x i32> %res
+}
+
+define <2 x i32> @sdiv_v2i32_unpacked(<2 x i32> %op1) vscale_range(1,0) #0 {
+; CHECK-LABEL: sdiv_v2i32_unpacked:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmlt v1.2s, v0.2s, #0
+; CHECK-NEXT:    usra v0.2s, v1.2s, #29
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #3
+; CHECK-NEXT:    ret
+  %res = sdiv <2 x i32> %op1, splat (i32 8)
+  ret <2 x i32> %res
+}
+
 define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
@@ -45,6 +68,26 @@ define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v64i8(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v64i8:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.16b, v0.16b, #0
+; VBITS_GE_128-NEXT:    cmlt v5.16b, v1.16b, #0
+; VBITS_GE_128-NEXT:    cmlt v6.16b, v3.16b, #0
+; VBITS_GE_128-NEXT:    usra v0.16b, v2.16b, #3
+; VBITS_GE_128-NEXT:    cmlt v2.16b, v4.16b, #0
+; VBITS_GE_128-NEXT:    usra v1.16b, v5.16b, #3
+; VBITS_GE_128-NEXT:    usra v3.16b, v6.16b, #3
+; VBITS_GE_128-NEXT:    usra v4.16b, v2.16b, #3
+; VBITS_GE_128-NEXT:    sshr v0.16b, v0.16b, #5
+; VBITS_GE_128-NEXT:    sshr v1.16b, v1.16b, #5
+; VBITS_GE_128-NEXT:    sshr v2.16b, v3.16b, #5
+; VBITS_GE_128-NEXT:    sshr v3.16b, v4.16b, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v64i8:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
@@ -139,6 +182,26 @@ define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v32i16(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v32i16:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.8h, v0.8h, #0
+; VBITS_GE_128-NEXT:    cmlt v5.8h, v1.8h, #0
+; VBITS_GE_128-NEXT:    cmlt v6.8h, v3.8h, #0
+; VBITS_GE_128-NEXT:    usra v0.8h, v2.8h, #11
+; VBITS_GE_128-NEXT:    cmlt v2.8h, v4.8h, #0
+; VBITS_GE_128-NEXT:    usra v1.8h, v5.8h, #11
+; VBITS_GE_128-NEXT:    usra v3.8h, v6.8h, #11
+; VBITS_GE_128-NEXT:    usra v4.8h, v2.8h, #11
+; VBITS_GE_128-NEXT:    sshr v0.8h, v0.8h, #5
+; VBITS_GE_128-NEXT:    sshr v1.8h, v1.8h, #5
+; VBITS_GE_128-NEXT:    sshr v2.8h, v3.8h, #5
+; VBITS_GE_128-NEXT:    sshr v3.8h, v4.8h, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v32i16:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
@@ -234,6 +297,26 @@ define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v16i32(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v16i32:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.4s, v0.4s, #0
+; VBITS_GE_128-NEXT:    cmlt v5.4s, v1.4s, #0
+; VBITS_GE_128-NEXT:    cmlt v6.4s, v3.4s, #0
+; VBITS_GE_128-NEXT:    usra v0.4s, v2.4s, #27
+; VBITS_GE_128-NEXT:    cmlt v2.4s, v4.4s, #0
+; VBITS_GE_128-NEXT:    usra v1.4s, v5.4s, #27
+; VBITS_GE_128-NEXT:    usra v3.4s, v6.4s, #27
+; VBITS_GE_128-NEXT:    usra v4.4s, v2.4s, #27
+; VBITS_GE_128-NEXT:    sshr v0.4s, v0.4s, #5
+; VBITS_GE_128-NEXT:    sshr v1.4s, v1.4s, #5
+; VBITS_GE_128-NEXT:    sshr v2.4s, v3.4s, #5
+; VBITS_GE_128-NEXT:    sshr v3.4s, v4.4s, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v16i32:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -329,6 +412,26 @@ define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
 }
 
 define void @sdiv_v8i64(ptr %a) #0 {
+; VBITS_GE_128-LABEL: sdiv_v8i64:
+; VBITS_GE_128:       // %bb.0:
+; VBITS_GE_128-NEXT:    ldp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    ldp q3, q4, [x0]
+; VBITS_GE_128-NEXT:    cmlt v2.2d, v0.2d, #0
+; VBITS_GE_128-NEXT:    cmlt v5.2d, v1.2d, #0
+; VBITS_GE_128-NEXT:    cmlt v6.2d, v3.2d, #0
+; VBITS_GE_128-NEXT:    usra v0.2d, v2.2d, #59
+; VBITS_GE_128-NEXT:    cmlt v2.2d, v4.2d, #0
+; VBITS_GE_128-NEXT:    usra v1.2d, v5.2d, #59
+; VBITS_GE_128-NEXT:    usra v3.2d, v6.2d, #59
+; VBITS_GE_128-NEXT:    usra v4.2d, v2.2d, #59
+; VBITS_GE_128-NEXT:    sshr v0.2d, v0.2d, #5
+; VBITS_GE_128-NEXT:    sshr v1.2d, v1.2d, #5
+; VBITS_GE_128-NEXT:    sshr v2.2d, v3.2d, #5
+; VBITS_GE_128-NEXT:    sshr v3.2d, v4.2d, #5
+; VBITS_GE_128-NEXT:    stp q0, q1, [x0, #32]
+; VBITS_GE_128-NEXT:    stp q2, q3, [x0]
+; VBITS_GE_128-NEXT:    ret
+;
 ; VBITS_GE_256-LABEL: sdiv_v8i64:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4



More information about the llvm-commits mailing list