[llvm] AMDGPU: Fix broken check lines in test (PR #154690)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 00:59:05 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
SelectionDAG checks were dropped, regenerate the test to
restore them.
---
Patch is 240.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154690.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll (+3407-1159)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 8f3b9a5006fc4..cffa287dd91f5 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1,136 +1,472 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -check-prefixes=SDAG %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=SDAG %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -check-prefixes=GISEL %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
define double @v_sqrt_f64(double %x) {
-; GISEL-LABEL: v_sqrt_f64:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
-; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX6-SDAG-LABEL: v_sqrt_f64:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
+; GFX6-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_sqrt_f64:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
+; GFX8-SDAG-NEXT: v_cmp_gt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: v_sqrt_f64:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_sqrt_f64:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; GFX8-GISEL-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
+; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
%result = call double @llvm.sqrt.f64(double %x)
ret double %result
}
define double @v_sqrt_f64_fneg(double %x) {
-; GISEL-LABEL: v_sqrt_f64_fneg:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], v[2:3]
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GISEL-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
-; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX6-SDAG-LABEL: v_sqrt_f64_fneg:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX6-SDAG-NEXT: s_brev_b32 s5, 9
+; GFX6-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_sqrt_f64_fneg:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX8-SDAG-NEXT: s_brev_b32 s5, 9
+; GFX8-SDAG-NEXT: v_cmp_lt_f64_e32 vcc, s[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: v_sqrt_f64_fneg:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; GFX6-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX6-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; GFX6-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX6-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX6-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX6-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_sqrt_f64_fneg:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX8-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; GFX8-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; GFX8-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2
+; GFX8-GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
+; GFX8-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX8-GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX8-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
%x.neg = fneg double %x
%result = call double @llvm.sqrt.f64(double %x.neg)
ret double %result
}
define double @v_sqrt_f64_fabs(double %x) {
-; GISEL-LABEL: v_sqrt_f64_fabs:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GISEL-NEXT: v_bfrev_b32_e32 v3, 8
-; GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GISEL-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
-; GISEL-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
-; GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 0.5
-; GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[4:5], v[2:3], 0.5
-; GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[2:3], v[0:1]
-; GISEL-NEXT: v_fma_f64 v[2:3], v[6:7], v[4:5], v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff80
-; GISEL-NEXT: v_mov_b32_e32 v5, 0x260
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
-; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
-; GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
-; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX6-SDAG-LABEL: v_sqrt_f64_fabs:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX6-SDAG-NEXT: s_brev_b32 s5, 8
+; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX6-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; GFX6-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX6-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX6-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; GFX6-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX6-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX6-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX6-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX6-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: v_sqrt_f64_fabs:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX8-SDAG-NEXT: s_brev_b32 s5, 8
+; GFX8-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x100
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, v2
+; GFX8-SDAG-NEXT: v_rsq_f64_e32 v[2:3], v[0:1]
+; GFX8-SDAG-NEXT: v_mul_f64 v[4:5], v[0:1], v[2:3]
+; GFX8-SDAG-NEXT: v_mul_f64 v[2:3], v[2:3], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 0.5
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[4:5], v[6:7], v[2:3], v[4:5]
+; GFX8-SDAG-NEXT: v_fma_f64 v[6:7], -v[4:5], v[4:5], v[0:1]
+; GFX8-SDAG-NEXT: v_fma_f64 v[2:3], v[6:7], v[2:3], v[4:5]
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff80
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x260
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
+; GFX8-SDAG-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v5
+; GFX8-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; GFX8-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: v_sqrt_f64_fabs:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-GISEL-NEXT: v_bfrev_b32_e32 v3, 8
+; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX6-GISEL-NEXT: v_lshlrev_b32_e32...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/154690
More information about the llvm-commits
mailing list