[llvm-branch-commits] [clang] 1786075 - Revert "[SME] Add intrinsics for FCVT(wid.) and FCVTL (#90215)"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu May 23 07:13:05 PDT 2024
Author: Lukacma
Date: 2024-05-23T15:13:01+01:00
New Revision: 1786075d2a347465e518cfaa04a40cb75eb75828
URL: https://github.com/llvm/llvm-project/commit/1786075d2a347465e518cfaa04a40cb75eb75828
DIFF: https://github.com/llvm/llvm-project/commit/1786075d2a347465e518cfaa04a40cb75eb75828.diff
LOG: Revert "[SME] Add intrinsics for FCVT(wid.) and FCVTL (#90215)"
This reverts commit 05c154f2bcba34f002b1f0c22c7a9e9614e9d83c.
Added:
Modified:
clang/include/clang/Basic/arm_sve.td
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll
Removed:
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c
llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll
################################################################################
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 4f28547998550..03570f94de666 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2265,10 +2265,6 @@ let TargetGuard = "sme2" in {
def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
}
-let TargetGuard = "sme-f16f16" in {
- def SVCVT_F32_X2 : SInst<"svcvt_{d}[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvt_widen_x2", [ IsStreaming],[]>;
-}
-
//
// Multi-vector floating-point convert from single-precision to interleaved half-precision/BFloat16
//
@@ -2277,13 +2273,6 @@ let TargetGuard = "sme2" in {
def SVCVTN_BF16_X2 : SInst<"svcvtn_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvtn_x2", [IsOverloadNone, IsStreaming],[]>;
}
-//
-//Multi-vector floating-point convert from half-precision to deinterleaved single-precision.
-//
-let TargetGuard = "sme-f16f16" in {
- def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>;
-}
-
//
// Multi-vector saturating extract narrow
//
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c
index e26499d3a63cc..4a5ee7e021f74 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c
@@ -497,25 +497,3 @@ svuint8_t test_qcvt_u8_s32_x4(svint32x4_t zn) __arm_streaming {
svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming {
return SVE_ACLE_FUNC(svqcvt_u16,_s64_x4,,)(zn);
}
-
-// CHECK-LABEL: @test_cvt_f32_x2(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
-// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
-// CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t(
-// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
-// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
-// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
-//
-__attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming {
- return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn);
-}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c
deleted file mode 100644
index 1142065614b8f..0000000000000
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c
+++ /dev/null
@@ -1,40 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-
-// REQUIRES: aarch64-registered-target
-
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-#include <arm_sme.h>
-
-#ifdef SVE_OVERLOADED_FORMS
-// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
-#else
-#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
-#endif
-
-// CHECK-LABEL: @test_cvtl_f32_x2(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
-// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
-// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
-// CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
-//
-// CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t(
-// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
-// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
-// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
-// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
-// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
-//
-svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming {
- return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn);
-}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index aad83823881f8..4544cf35fb7b3 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3121,11 +3121,6 @@ let TargetPrefix = "aarch64" in {
: DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty],
[llvm_nxv4f32_ty, llvm_nxv4f32_ty],
[IntrNoMem]>;
-
- class SME2_CVT_WIDENING_VG2_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
- [LLVMSubdivide2VectorType<0>], [IntrNoMem]>;
-
class SME2_CVT_VG4_SINGLE_Intrinsic
: DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>],
@@ -3417,13 +3412,6 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_suvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
def int_aarch64_sme_usvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
-
- //
- //Multi-vector floating-point convert from half-precision to deinterleaved single-precision.
- //
-
- def int_aarch64_sve_fcvtl_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic;
-
//
// Multi-vector floating-point CVT from single-precision to interleaved half-precision/BFloat16
//
@@ -3443,7 +3431,7 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sve_fcvtzu_x4 : SME2_CVT_X4_Intrinsic;
def int_aarch64_sve_scvtf_x4 : SME2_CVT_X4_Intrinsic;
def int_aarch64_sve_ucvtf_x4 : SME2_CVT_X4_Intrinsic;
- def int_aarch64_sve_fcvt_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic;
+
//
// Multi-vector saturating extract narrow
//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 8983be140f1d5..25f2e4d7c4de6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5717,12 +5717,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_sve_ucvtf_x4:
SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
return;
- case Intrinsic::aarch64_sve_fcvt_widen_x2:
- SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
- return;
- case Intrinsic::aarch64_sve_fcvtl_widen_x2:
- SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
- return;
case Intrinsic::aarch64_sve_sclamp_single_x2:
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
Node->getValueType(0),
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll
index 611cdcda157e2..bc1db878cbd31 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
;
; FCVT
@@ -139,15 +139,6 @@ define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale
ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res
}
-define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_widen_x2_f16(<vscale x 8 x half> %zn0) {
-; CHECK-LABEL: multi_vector_cvt_widen_x2_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvt { z0.s, z1.s }, z0.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> %zn0)
- ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
-}
-
declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float>, <vscale x 4 x float>)
declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll
deleted file mode 100644
index 30dc7cbfaea6c..0000000000000
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -verify-machineinstrs < %s | FileCheck %s
-
-define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvtl_widen_x2_f16(<vscale x 8 x half> %zn0) {
-; CHECK-LABEL: multi_vector_cvtl_widen_x2_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtl { z0.s, z1.s }, z0.h
-; CHECK-NEXT: ret
- %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32(<vscale x 8 x half> %zn0)
- ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
-}
More information about the llvm-branch-commits
mailing list