[llvm] [AArch64] optimise SVE cmp intrinsics with no active lanes (PR #104779)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 06:09:44 PDT 2024
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/104779
>From a6e8a219da3769f2b59887f2ab553927dd7a7c13 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Mon, 19 Aug 2024 12:50:33 +0000
Subject: [PATCH 1/2] [AArch64] optimise SVE cmp intrinsics with no active
lanes
---
.../AArch64/AArch64TargetTransformInfo.cpp | 23 ++
.../sve-intrinsic-comb-no-active-lanes-cmp.ll | 235 ++++++++++++++++++
2 files changed, 258 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a416565392eabe..0254ede383d255 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1160,6 +1160,10 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
+ // Replace by zero constant when all lanes are inactive
+ if (auto II_NA = instCombineSVENoActiveUnaryZero(IC, II))
+ return II_NA;
+
// Check that the predicate is all active
auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
@@ -2131,6 +2135,25 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_st4:
case Intrinsic::aarch64_sve_st4q:
return instCombineSVENoActiveUnaryErase(IC, II, 4);
+ case Intrinsic::aarch64_sve_cmpeq:
+ case Intrinsic::aarch64_sve_cmpeq_wide:
+ case Intrinsic::aarch64_sve_cmpge:
+ case Intrinsic::aarch64_sve_cmpge_wide:
+ case Intrinsic::aarch64_sve_cmpgt:
+ case Intrinsic::aarch64_sve_cmpgt_wide:
+ case Intrinsic::aarch64_sve_cmphi:
+ case Intrinsic::aarch64_sve_cmphi_wide:
+ case Intrinsic::aarch64_sve_cmphs:
+ case Intrinsic::aarch64_sve_cmphs_wide:
+ case Intrinsic::aarch64_sve_cmple_wide:
+ case Intrinsic::aarch64_sve_cmplo_wide:
+ case Intrinsic::aarch64_sve_cmpls_wide:
+ case Intrinsic::aarch64_sve_cmplt_wide:
+ case Intrinsic::aarch64_sve_fcmpeq:
+ case Intrinsic::aarch64_sve_fcmpge:
+ case Intrinsic::aarch64_sve_fcmpgt:
+ case Intrinsic::aarch64_sve_fcmpne:
+ case Intrinsic::aarch64_sve_fcmpuo:
case Intrinsic::aarch64_sve_ld1_gather:
case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
case Intrinsic::aarch64_sve_ld1_gather_sxtw:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
new file mode 100644
index 00000000000000..769cadd3ffa729
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 16 x i1> @test_cmpeq(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpeq(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpeq_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpeq_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpge(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpge(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpge_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpge_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpgt(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpgt(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpgt_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpgt_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmphi(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmphi(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmphi_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmphi_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmphs(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmphs(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmphs_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmphs_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmple_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmple_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmplo_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmplo_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpls_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpls_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmplt_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmplt_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpne(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpne(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 16 x i1> @test_cmpne_wide(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b){
+; CHECK-LABEL: define <vscale x 16 x i1> @test_cmpne_wide(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 16 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i1> %0
+}
+
+define <vscale x 8 x i1> @test_fcmpeq(<vscale x 8 x half> %a, <vscale x 8 x half> %b){
+; CHECK-LABEL: define <vscale x 8 x i1> @test_fcmpeq(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 8 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpeq.nxv16i8(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i1> %0
+}
+
+define <vscale x 8 x i1> @test_fcmpge(<vscale x 8 x half> %a, <vscale x 8 x half> %b){
+; CHECK-LABEL: define <vscale x 8 x i1> @test_fcmpge(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 8 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpge.nxv16i8(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i1> %0
+}
+
+define <vscale x 8 x i1> @test_fcmpgt(<vscale x 8 x half> %a, <vscale x 8 x half> %b){
+; CHECK-LABEL: define <vscale x 8 x i1> @test_fcmpgt(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 8 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpgt.nxv16i8(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i1> %0
+}
+
+define <vscale x 8 x i1> @test_fcmpne(<vscale x 8 x half> %a, <vscale x 8 x half> %b){
+; CHECK-LABEL: define <vscale x 8 x i1> @test_fcmpne(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 8 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpne.nxv16i8(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i1> %0
+}
+
+define <vscale x 8 x i1> @test_fcmpuo(<vscale x 8 x half> %a, <vscale x 8 x half> %b){
+; CHECK-LABEL: define <vscale x 8 x i1> @test_fcmpuo(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret <vscale x 8 x i1> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.fcmpuo.nxv16i8(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i1> %0
+}
+
>From acce4c21614923da2c4630a206560ac74242d952 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 21 Aug 2024 11:15:55 +0000
Subject: [PATCH 2/2] added missing intrinsics
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 ++
.../AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll | 10 ++++++++++
2 files changed, 12 insertions(+)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0254ede383d255..253b36f1fea272 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2149,6 +2149,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_cmplo_wide:
case Intrinsic::aarch64_sve_cmpls_wide:
case Intrinsic::aarch64_sve_cmplt_wide:
+ case Intrinsic::aarch64_sve_facge:
+ case Intrinsic::aarch64_sve_facgt:
case Intrinsic::aarch64_sve_fcmpeq:
case Intrinsic::aarch64_sve_fcmpge:
case Intrinsic::aarch64_sve_fcmpgt:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
index 769cadd3ffa729..1833bb6db248d9 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
@@ -178,6 +178,16 @@ entry:
ret <vscale x 16 x i1> %0
}
+define <vscale x 8 x i1> @test_facge(<vscale x 8 x half> %a, <vscale x 8 x half> %b){ry:
+ %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.facge.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i1> %0
+}
+
+define <vscale x 8 x i1> @test_facgt(<vscale x 8 x half> %a, <vscale x 8 x half> %b){ry:
+ %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.facgt.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i1> %0
+}
+
define <vscale x 8 x i1> @test_fcmpeq(<vscale x 8 x half> %a, <vscale x 8 x half> %b){
; CHECK-LABEL: define <vscale x 8 x i1> @test_fcmpeq(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
More information about the llvm-commits
mailing list