[llvm] [AMDGPU] Fix canonicalization of truncated values. (PR #83054)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 26 12:47:24 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Harald van Dijk (hvdijk)
<details>
<summary>Changes</summary>
We were relying on roundings to implicitly canonicalize, which is generally safe, except with roundings that may be optimized away.
Fixes #<!-- -->82937.
---
Full diff: https://github.com/llvm/llvm-project/pull/83054.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+14)
- (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll (+29)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 84ef9679ab9563..f7f64fdc9aae8a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12831,6 +12831,20 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
}
}
+ // The TRUNC parameter of FP_ROUND specifies whether the operation may be
+ // optimized away because the operation is known to be exact. Even if the
+ // operation would be considered exact in normal circumstances where we do not
+ // care about SNaN, we do care about SNaN here and must preserve the operation
+ // unless its input is known to be canonicalized.
+ if (SrcOpc == ISD::FP_ROUND || SrcOpc == ISD::STRICT_FP_ROUND) {
+ if (N0.getConstantOperandVal(1) == 0 ||
+ isCanonicalized(DAG, N0.getOperand(0)))
+ return N0;
+ SDLoc SL(N0);
+ return DAG.getNode(SrcOpc, SL, VT, N0.getOperand(0),
+ DAG.getTargetConstant(0, SL, MVT::i32));
+ }
+
return isCanonicalized(DAG, N0) ? N0 : SDValue();
}
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index c773742a459507..14c5a8c46f7ba5 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -26818,11 +26818,15 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
; GCN-LABEL: v_canonicalize_bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_canonicalize_bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_canonicalize_bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 274621307f540d..f53adf88454bc6 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -170,6 +170,35 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i1
ret void
}
+define half @s_test_canonicalize_arg(half %x) #1 {
+; VI-LABEL: s_test_canonicalize_arg:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_max_f16_e32 v0, v0, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_test_canonicalize_arg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; CI-LABEL: s_test_canonicalize_arg:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_test_canonicalize_arg:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %canonicalized = call half @llvm.canonicalize.f16(half %x)
+ ret half %canonicalized
+}
+
define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 {
; VI-LABEL: v_test_canonicalize_build_vector_v2f16:
; VI: ; %bb.0:
``````````
</details>
https://github.com/llvm/llvm-project/pull/83054
More information about the llvm-commits
mailing list