[llvm] [AArch64][NFC] Add test as a representative of scalarizing a vector i… (PR #114107)

Sushant Gokhale via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 3 21:35:21 PST 2024


https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/114107

>From d38f624894cf37dc2664062b93325c177f900bd5 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Tue, 29 Oct 2024 23:16:13 +0530
Subject: [PATCH] [AArch64][NFC] Add test as a representative of scalarizing a
 vector integer division

The last resort to vectorize a bundle of integer divisions is considered scalarizing it.
Currently, the cost estimates for scalarizing a vector division can be considerably overestimated
as is the scenario with this motivating test case i.e. vector cost should not deviate much from
the  scalar cost.

Future patch will try to improve the scalarization cost.
---
 .../Transforms/SLPVectorizer/AArch64/div.ll   | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
index e972955e26cb47..9f8b99af59740d 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
@@ -551,3 +551,57 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
   %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
   ret <4 x i32> %r3
 }
+
+; computes (a/const + x - y) * z
+define <2 x i32> @vectorize_sdiv_v2i32(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
+; NO-SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
+; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], 2
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], 4
+; NO-SVE-NEXT:    [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
+; NO-SVE-NEXT:    [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
+; NO-SVE-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
+; NO-SVE-NEXT:    [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
+; NO-SVE-NEXT:    [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
+; NO-SVE-NEXT:    [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
+; NO-SVE-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
+; NO-SVE-NEXT:    [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
+; NO-SVE-NEXT:    [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
+; NO-SVE-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
+; NO-SVE-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
+; NO-SVE-NEXT:    [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
+; NO-SVE-NEXT:    [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
+; NO-SVE-NEXT:    ret <2 x i32> [[RES1]]
+;
+; SVE-LABEL: define <2 x i32> @vectorize_sdiv_v2i32(
+; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
+; SVE-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
+; SVE-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
+; SVE-NEXT:    [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
+; SVE-NEXT:    ret <2 x i32> [[TMP4]]
+;
+{
+  %a0 = extractelement <2 x i32> %a, i64 0
+  %a1 = extractelement <2 x i32> %a, i64 1
+  %1 = sdiv i32 %a0, 2
+  %2 = sdiv i32 %a1, 4
+  %x0 = extractelement <2 x i32> %x, i64 0
+  %x1 = extractelement <2 x i32> %x, i64 1
+  %3 = add i32 %1, %x0
+  %4 = add i32 %2, %x1
+  %y0 = extractelement <2 x i32> %y, i64 0
+  %y1 = extractelement <2 x i32> %y, i64 1
+  %5 = sub i32 %3, %y0
+  %6 = sub i32 %4, %y1
+  %z0 = extractelement <2 x i32> %z, i64 0
+  %z1 = extractelement <2 x i32> %z, i64 1
+  %7 = mul i32 %5, %z0
+  %8 = mul i32 %6, %z1
+  %res0 = insertelement <2 x i32> poison, i32 %7, i32 0
+  %res1 = insertelement <2 x i32> %res0, i32 %8, i32 1
+  ret <2 x i32> %res1
+}



More information about the llvm-commits mailing list