[PATCH] D141439: [AARCH64][SVE] Do not optimize vector conversions
Sjoerd Meijer via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 19 08:50:53 PST 2023
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG68f45796edbd: [AARCH64][SVE] Do not optimize vector conversions (authored by bzinodev, committed by SjoerdMeijer).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D141439/new/
https://reviews.llvm.org/D141439
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll
Index: llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll
@@ -0,0 +1,59 @@
+
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-v1 -O3 -opaque-pointers -aarch64-sve-vector-bits-min=256 -verify-machineinstrs | FileCheck %s --check-prefixes=SVE256
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-v1 -O3 -opaque-pointers -aarch64-sve-vector-bits-min=128 -verify-machineinstrs | FileCheck %s --check-prefixes=NEON
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n1 -O3 -opaque-pointers -verify-machineinstrs | FileCheck %s --check-prefixes=NEON
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-v2 -O3 -opaque-pointers -verify-machineinstrs | FileCheck %s --check-prefixes=NEON
+
+define internal i32 @test(ptr nocapture readonly %p1, i32 %i1, ptr nocapture readonly %p2, i32 %i2) {
+; SVE256-LABEL: test:
+; SVE256: ld1b { z0.h }, p0/z,
+; SVE256: ld1b { z1.h }, p0/z,
+; SVE256: sub z0.h, z0.h, z1.h
+; SVE256-NEXT: sunpklo z1.s, z0.h
+; SVE256-NEXT: ext z0.b, z0.b, z0.b, #16
+; SVE256-NEXT: sunpklo z0.s, z0.h
+; SVE256-NEXT: add z0.s, z1.s, z0.s
+; SVE256-NEXT: uaddv d0, p1, z0.s
+
+; NEON-LABEL: test:
+; NEON: tbl
+; NEON-NEXT: tbl
+; NEON-NEXT: tbl
+; NEON-NEXT: tbl
+; NEON-NEXT: tbl
+; NEON-NEXT: tbl
+; NEON-NEXT: tbl
+; NEON-NEXT: tbl
+; NEON: addv
+
+
+L.entry:
+ br label %L1
+
+L1: ; preds = %L1, %L.entry
+ %a = phi i32 [ 16, %L.entry ], [ %14, %L1 ]
+ %b = phi i32 [ 0, %L.entry ], [ %13, %L1 ]
+ %i = phi i32 [ 0, %L.entry ], [ %12, %L1 ]
+ %0 = mul i32 %b, %i1
+ %1 = sext i32 %0 to i64
+ %2 = getelementptr i8, ptr %p1, i64 %1
+ %3 = mul i32 %b, %i2
+ %4 = sext i32 %3 to i64
+ %5 = getelementptr i8, ptr %p2, i64 %4
+ %6 = load <16 x i8>, ptr %2, align 1
+ %7 = zext <16 x i8> %6 to <16 x i32>
+ %8 = load <16 x i8>, ptr %5, align 1
+ %9 = zext <16 x i8> %8 to <16 x i32>
+ %10 = sub nsw <16 x i32> %7, %9
+ %11 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %10)
+ %12 = add i32 %11, %i
+ %13 = add nuw nsw i32 %b, 1
+ %14 = add nsw i32 %a, -1
+ %.not = icmp eq i32 %14, 0
+ br i1 %.not, label %L2, label %L1
+
+L2: ; preds = %L1
+ ret i32 %12
+}
+
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14177,6 +14177,11 @@
bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
Loop *L) const {
+ // shuffle_vector instructions are serialized when targeting SVE,
+ // see LowerSPLAT_VECTOR. This peephole is not beneficial.
+ if (Subtarget->useSVEForFixedLengthVectors())
+ return false;
+
// Try to optimize conversions using tbl. This requires materializing constant
// index vectors, which can increase code size and add loads. Skip the
// transform unless the conversion is in a loop block guaranteed to execute
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D141439.490541.patch
Type: text/x-patch
Size: 3347 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230119/e0c31d5f/attachment.bin>
More information about the llvm-commits
mailing list