[llvm] f598b61 - [AArch64][SME] Non-streaming compatible SCVTF emitted with --force-streaming-compatible-sve
Dinar Temirbulatov via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 14 06:50:54 PDT 2023
Author: Dinar Temirbulatov
Date: 2023-08-14T13:49:57Z
New Revision: f598b616e0112690900af56202a670a10d767402
URL: https://github.com/llvm/llvm-project/commit/f598b616e0112690900af56202a670a10d767402
DIFF: https://github.com/llvm/llvm-project/commit/f598b616e0112690900af56202a670a10d767402.diff
LOG: [AArch64][SME] Non-streaming compatible SCVTF emitted with --force-streaming-compatible-sve
For scalar integer to float converts for Streaming Compatible SVE use
non-NEON version of convert instrction.
Differential Revision: https://reviews.llvm.org/D157698
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index afba3f5411ad9c..8ed30bbbf49bef 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16555,7 +16555,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
// This eliminates an "integer-to-vector-move" UOP and improves throughput.
SDValue N0 = N->getOperand(0);
- if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index 0bd767cd436557..4ae4e6538703ca 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -1122,3 +1122,201 @@ define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) {
store <4 x double> %res, ptr %b
ret void
}
+
+define half @scvtf_i16_f16(ptr %0) {
+; CHECK-LABEL: scvtf_i16_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsh w8, [x0]
+; CHECK-NEXT: scvtf h0, w8
+; CHECK-NEXT: ret
+ %2 = load i16, ptr %0, align 64
+ %3 = sitofp i16 %2 to half
+ ret half %3
+}
+
+define float @scvtf_i16_f32(ptr %0) {
+; CHECK-LABEL: scvtf_i16_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsh w8, [x0]
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ret
+ %2 = load i16, ptr %0, align 64
+ %3 = sitofp i16 %2 to float
+ ret float %3
+}
+
+define double @scvtf_i16_f64(ptr %0) {
+; CHECK-LABEL: scvtf_i16_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrsh w8, [x0]
+; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: ret
+ %2 = load i16, ptr %0, align 64
+ %3 = sitofp i16 %2 to double
+ ret double %3
+}
+
+define half @scvtf_i32_f16(ptr %0) {
+; CHECK-LABEL: scvtf_i32_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: scvtf h0, w8
+; CHECK-NEXT: ret
+ %2 = load i32, ptr %0, align 64
+ %3 = sitofp i32 %2 to half
+ ret half %3
+}
+
+define float @scvtf_i32_f32(ptr %0) {
+; CHECK-LABEL: scvtf_i32_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ret
+ %2 = load i32, ptr %0, align 64
+ %3 = sitofp i32 %2 to float
+ ret float %3
+}
+
+define double @scvtf_i32_f64(ptr %0) {
+; CHECK-LABEL: scvtf_i32_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: ret
+ %2 = load i32, ptr %0, align 64
+ %3 = sitofp i32 %2 to double
+ ret double %3
+}
+
+define half @scvtf_i64_f16(ptr %0) {
+; CHECK-LABEL: scvtf_i64_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: scvtf h0, x8
+; CHECK-NEXT: ret
+ %2 = load i64, ptr %0, align 64
+ %3 = sitofp i64 %2 to half
+ ret half %3
+}
+
+define float @scvtf_i64_f32(ptr %0) {
+; CHECK-LABEL: scvtf_i64_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: ret
+ %2 = load i64, ptr %0, align 64
+ %3 = sitofp i64 %2 to float
+ ret float %3
+}
+
+define double @scvtf_i64_f64(ptr %0) {
+; CHECK-LABEL: scvtf_i64_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: ret
+ %2 = load i64, ptr %0, align 64
+ %3 = sitofp i64 %2 to double
+ ret double %3
+}
+
+define half @ucvtf_i16_f16(ptr %0) {
+; CHECK-LABEL: ucvtf_i16_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0]
+; CHECK-NEXT: ucvtf h0, w8
+; CHECK-NEXT: ret
+ %2 = load i16, ptr %0, align 64
+ %3 = uitofp i16 %2 to half
+ ret half %3
+}
+
+define float @ucvtf_i16_f32(ptr %0) {
+; CHECK-LABEL: ucvtf_i16_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ucvtf s0, s0
+; CHECK-NEXT: ret
+ %2 = load i16, ptr %0, align 64
+ %3 = uitofp i16 %2 to float
+ ret float %3
+}
+
+define double @ucvtf_i16_f64(ptr %0) {
+; CHECK-LABEL: ucvtf_i16_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ucvtf d0, d0
+; CHECK-NEXT: ret
+ %2 = load i16, ptr %0, align 64
+ %3 = uitofp i16 %2 to double
+ ret double %3
+}
+
+define half @ucvtf_i32_f16(ptr %0) {
+; CHECK-LABEL: ucvtf_i32_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: ucvtf h0, w8
+; CHECK-NEXT: ret
+ %2 = load i32, ptr %0, align 64
+ %3 = uitofp i32 %2 to half
+ ret half %3
+}
+
+define float @ucvtf_i32_f32(ptr %0) {
+; CHECK-LABEL: ucvtf_i32_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w8, [x0]
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ret
+ %2 = load i32, ptr %0, align 64
+ %3 = uitofp i32 %2 to float
+ ret float %3
+}
+
+define double @ucvtf_i32_f64(ptr %0) {
+; CHECK-LABEL: ucvtf_i32_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ucvtf d0, d0
+; CHECK-NEXT: ret
+ %2 = load i32, ptr %0, align 64
+ %3 = uitofp i32 %2 to double
+ ret double %3
+}
+
+define half @ucvtf_i64_f16(ptr %0) {
+; CHECK-LABEL: ucvtf_i64_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: ucvtf h0, x8
+; CHECK-NEXT: ret
+ %2 = load i64, ptr %0, align 64
+ %3 = uitofp i64 %2 to half
+ ret half %3
+}
+
+define float @ucvtf_i64_f32(ptr %0) {
+; CHECK-LABEL: ucvtf_i64_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: ret
+ %2 = load i64, ptr %0, align 64
+ %3 = uitofp i64 %2 to float
+ ret float %3
+}
+
+define double @ucvtf_i64_f64(ptr %0) {
+; CHECK-LABEL: ucvtf_i64_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: ret
+ %2 = load i64, ptr %0, align 64
+ %3 = uitofp i64 %2 to double
+ ret double %3
+}
More information about the llvm-commits
mailing list