[PATCH] D142456: [AArch64][CostModel]: Add costs for zero/sign extend.

Wed Jan 25 02:13:37 PST 2023

hassnaa-arm updated this revision to Diff 492036.
hassnaa-arm marked 3 inline comments as done.
hassnaa-arm added a comment.

Recalculate costs. In the code generation testing file, use real variable instead of undef to get accurate costs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142456/new/

https://reviews.llvm.org/D142456

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Analysis/CostModel/AArch64/sve-ext.ll


Index: llvm/test/Analysis/CostModel/AArch64/sve-ext.ll
===================================================================

--- /dev/null
+++ llvm/test/Analysis/CostModel/AArch64/sve-ext.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @sve_ext() {
+; CHECK-LABEL: 'sve_ext'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+  %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+  %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+
+  %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
+  %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
+  %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
+
+  ret void
+}
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2059,6 +2059,17 @@
     { ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0 },
     { ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0 },
     { ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0 },
+
+    // Add cost for extending to illegal -too wide- scalable vectors.
+    // zero/sign extend are implemented by multiple unpack operations,
+    // where each operation has a cost of 2.
+    { ISD::ZERO_EXTEND, MVT::nxv16i16, MVT::nxv16i8, 6},
+    { ISD::ZERO_EXTEND, MVT::nxv16i32, MVT::nxv16i8, 12},
+    { ISD::ZERO_EXTEND, MVT::nxv16i64, MVT::nxv16i8, 28},
+
+    { ISD::SIGN_EXTEND, MVT::nxv16i16, MVT::nxv16i8, 6},
+    { ISD::SIGN_EXTEND, MVT::nxv16i32, MVT::nxv16i8, 12},
+    { ISD::SIGN_EXTEND, MVT::nxv16i64, MVT::nxv16i8, 28},
   };
 
   if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18311,6 +18311,16 @@
   // we can convert that DUP into another extract_high (of a bigger DUP), which
   // helps the backend to decide that an sabdl2 would be useful, saving a real
   // extract_high operation.
+
+  // zext( shl (vscalei32, constant), i64 )
+
+  // if(N->getOpcode() == ISD::ZERO_EXTEND &&
+  //    N->getOperand(0).getOpcode() == ISD::SHL &&
+  //    N->getOperand(0).getOperand(0).getOpcode() == intrinsic::vscale.i32) {
+
+  //     return N->getOperand(0).getNode();
+  // }
+
   if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
       (N->getOperand(0).getOpcode() == ISD::ABDU ||
        N->getOperand(0).getOpcode() == ISD::ABDS)) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D142456.492036.patch
Type: text/x-patch
Size: 3980 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230125/0ca344e8/attachment.bin>