[llvm] AMDGPU: Add f64 to f32 support for llvm.fptrunc.round (PR #107481)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 15:46:06 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Changpeng Fang (changpeng)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/107481.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+6)
- (modified) llvm/lib/Target/AMDGPU/SIModeRegister.cpp (+9-3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll (+39)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 69e1b9a38324f2..c0154645b391df 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -230,11 +230,17 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
let Uses = [MODE, EXEC] in {
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;
+
+def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VReg_64:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
+def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
+
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC], isConvergent = 1 in {
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index a590c6560942cf..6bcf9757d29457 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -165,7 +165,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
const SIInstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
if (TII->usesFPDPRounding(MI) ||
- Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) {
+ Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
+ Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
switch (Opcode) {
case AMDGPU::V_INTERP_P1LL_F16:
case AMDGPU::V_INTERP_P1LV_F16:
@@ -189,8 +190,13 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
B.addImm(0); // omod
} else
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
- return Status(FP_ROUND_MODE_DP(3),
- FP_ROUND_MODE_DP(Mode));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+ }
+ case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
+ unsigned Mode = MI.getOperand(2).getImm();
+ MI.removeOperand(2);
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
default:
return DefaultStatus;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
index 291fe00a6177bd..21fe1ce4dc1d6f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
@@ -3,15 +3,15 @@
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
-
; TODO: check for GISEL when bfloat is supported.
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
;--- f16-f64-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
@@ -21,14 +21,6 @@ define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %o
ret void
}
-;--- f32-f64-err.ll
-define amdgpu_gs void @test_fptrunc_round_f32_f64(double %a, ptr addrspace(1) %out) {
-; F32-F64-FAIL: LLVM ERROR: Cannot select
- %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
- store float %res, ptr addrspace(1) %out, align 4
- ret void
-}
-
;--- bf16-f32-err.ll
define amdgpu_gs void @test_fptrunc_round_bf16_f32(float %a, ptr addrspace(1) %out) {
; BF16-F32-FAIL: LLVM ERROR: Cannot select
@@ -47,8 +39,16 @@ define amdgpu_gs void @test_fptrunc_round_bf16_f64(double %a, ptr addrspace(1) %
;--- f16-f32-tonearestaway-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f32_tonearestaway(float %a, ptr addrspace(1) %out) {
-; TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
+; F16-F32-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearestaway")
store half %res, ptr addrspace(1) %out, align 2
ret void
}
+
+;--- f32-f64-tonearestaway-err.ll
+define amdgpu_gs void @test_fptrunc_round_f32_f64_tonearestaway(double %a, ptr addrspace(1) %out) {
+; F32-F64-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearestaway")
+ store float %res, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 54ed6f1eb42820..3d9ce6e79d9d28 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -516,3 +516,42 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float>
%res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward")
ret <8 x half> %res
}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest")
+ ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward")
+ ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
+ ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero")
+ ret float %res
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/107481
More information about the llvm-commits
mailing list