[llvm] [AMDGPU] Swap select operands to allow later v_cndmask shrinking into vop2 (PR #142354)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 2 03:10:35 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ana Mihajlovic (mihajlovicana)
<details>
<summary>Changes</summary>
The goal of this is swapping the operands in v_cndmask x, y, where y is a constant, to use the vop2 format instead of vop3. This also requires inverting the comparison (instruction generating the vcc that will be used by v_cndmask). Doing so allows for the later merging of these instructions into v_dual_cndmask.
---
Patch is 487.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142354.diff
15 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+29-1)
- (modified) llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll (+47-46)
- (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.ll (+102-102)
- (modified) llvm/test/CodeGen/AMDGPU/fminimum3.ll (+102-102)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll (+24-24)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll (+828-828)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll (+828-828)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll (+3-2)
- (added) llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll (+724)
- (modified) llvm/test/CodeGen/AMDGPU/uaddsat.ll (+14-15)
- (modified) llvm/test/CodeGen/AMDGPU/usubsat.ll (+14-15)
- (modified) llvm/test/CodeGen/AMDGPU/v_cndmask.ll (+15-15)
- (modified) llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll (+420-420)
- (modified) llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll (+420-420)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5f41bd7d8a617..1a5d2232213ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4710,6 +4710,11 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
}
+bool isFnegOrFabs(SDValue &V) {
+ unsigned Opcode = V.getOpcode();
+ return Opcode == ISD::FNEG || Opcode == ISD::FABS;
+}
+
SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
@@ -4727,7 +4732,30 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
SDValue True = N->getOperand(1);
SDValue False = N->getOperand(2);
- if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
+ int ShouldSwap = 0;
+ for (auto it = Cond->use_begin(); it != Cond->use_end(); it++) {
+ auto User = it->getUser();
+
+ if (User->getOpcode() != ISD::SELECT) {
+ ShouldSwap = 0;
+ break;
+ }
+
+ auto Op1 = User->getOperand(1);
+ auto Op2 = User->getOperand(2);
+
+ // if the operand is defined by fneg or fabs it means the instruction
+ // will have source modifiers and therefore can't be shrinked to vop2
+ if (isFnegOrFabs(Op1) || isFnegOrFabs(Op2))
+ continue;
+
+ if (!Op1->isDivergent() && Op2->isDivergent())
+ ShouldSwap++;
+ else if (Op1->isDivergent() && !Op2->isDivergent())
+ ShouldSwap--;
+ }
+
+ if (Cond->hasOneUse() || ShouldSwap > 0) {
SelectionDAG &DAG = DCI.DAG;
if (DAG.isConstantValueOfAnyType(True) &&
!DAG.isConstantValueOfAnyType(False)) {
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
index c69b0cce3d208..9ddf3e9340435 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
@@ -1097,71 +1097,72 @@ define double @double16_extelt_vec(i32 %sel) {
; GCN-LABEL: double16_extelt_vec:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v3, 0x3ff19999
-; GCN-NEXT: v_mov_b32_e32 v4, 0x4000cccc
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v0
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
+; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0x9999999a
; GCN-NEXT: v_mov_b32_e32 v2, 0xcccccccd
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT: v_mov_b32_e32 v4, 0x4008cccc
-; GCN-NEXT: s_or_b64 vcc, s[4:5], vcc
+; GCN-NEXT: v_mov_b32_e32 v3, 0x3ff19999
+; GCN-NEXT: v_mov_b32_e32 v4, 0x4000cccc
+; GCN-NEXT: s_or_b64 vcc, vcc, s[4:5]
; GCN-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
; GCN-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
+; GCN-NEXT: v_mov_b32_e32 v4, 0x4008cccc
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GCN-NEXT: v_mov_b32_e32 v4, 0x40106666
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GCN-NEXT: v_mov_b32_e32 v4, 0x40146666
-; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v0
-; GCN-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GCN-NEXT: s_or_b64 s[4:5], s[4:5], vcc
+; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 4, v0
+; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[4:5]
+; GCN-NEXT: s_and_b64 s[4:5], s[4:5], vcc
; GCN-NEXT: v_mov_b32_e32 v4, 0x40186666
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v5, 0x401c6666
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 6, v0
; GCN-NEXT: v_mov_b32_e32 v4, 0x66666666
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GCN-NEXT: s_or_b64 vcc, vcc, s[4:5]
-; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; GCN-NEXT: s_and_b64 vcc, vcc, s[4:5]
+; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GCN-NEXT: v_mov_b32_e32 v4, 0x40203333
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 7, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GCN-NEXT: v_mov_b32_e32 v4, 0x40223333
-; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 8, v0
-; GCN-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GCN-NEXT: s_or_b64 s[4:5], s[4:5], vcc
+; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 8, v0
+; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[4:5]
+; GCN-NEXT: s_and_b64 s[4:5], s[4:5], vcc
; GCN-NEXT: v_mov_b32_e32 v4, 0x40243333
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v4, 0x40263333
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 10, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v4, 0x40283333
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 11, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v4, 0x402a3333
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 12, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v4, 0x402c3333
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 13, v0
+; GCN-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
+; GCN-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v5, 0x402e3333
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 14, v0
; GCN-NEXT: v_mov_b32_e32 v4, 0x33333333
-; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
-; GCN-NEXT: s_or_b64 vcc, vcc, s[4:5]
-; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0
-; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; GCN-NEXT: s_and_b64 vcc, vcc, s[4:5]
+; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 15, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; GCN-NEXT: v_mov_b32_e32 v1, 0x40301999
-; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
%ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 53d940e1e6c1a..8a17a759ac334 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -3167,15 +3167,15 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.maximum.f64(double %a, double %b)
%max1 = call double @llvm.maximum.f64(double %max0, double %c)
@@ -3200,15 +3200,15 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[0:1]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.maximum.f64(double %a, double %b)
%max1 = call double @llvm.maximum.f64(double %c, double %max0)
@@ -3274,15 +3274,15 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, |v[0:1]|, v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call double @llvm.fabs.f64(double %a)
%max0 = call double @llvm.maximum.f64(double %a.fabs, double %b)
@@ -3308,15 +3308,15 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[2:3]|
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, v[0:1], |v[2:3]|
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fabs = call double @llvm.fabs.f64(double %b)
%max0 = call double @llvm.maximum.f64(double %a, double %b.fabs)
@@ -3342,15 +3342,15 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, v[0:1], |v[4:5]|
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fabs = call double @llvm.fabs.f64(double %c)
%max0 = call double @llvm.maximum.f64(double %a, double %b)
@@ -3376,15 +3376,15 @@ define double @v_fmaximum3_f64_fabs_all(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], |v[0:1]|, |v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[2:3]|
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, |v[0:1]|, |v[2:3]|
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]|
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, v[0:1], |v[4:5]|
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call double @llvm.fabs.f64(double %a)
%b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3412,15 +3412,15 @@ define double @v_fmaximum3_f64_fneg_all(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, -v[0:1], -v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, v[0:1], -v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg double %a
%b.fneg = fneg double %b
@@ -3448,15 +3448,15 @@ define double @v_fmaximum3_f64_fneg_fabs_all(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, -|v[0:1]|, -|v[2:3]|
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -|v[4:5]|
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -|v[4:5]|
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, v[0:1], -|v[4:5]|
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fabs = call double @llvm.fabs.f64(double %a)
%b.fabs = call double @llvm.fabs.f64(double %b)
@@ -3487,15 +3487,15 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], -v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, -v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%a.fneg = fneg double %a
%max0 = call double @llvm.maximum.f64(double %a.fneg, double %b)
@@ -3521,15 +3521,15 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], -v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, v[0:1], -v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%b.fneg = fneg double %b
%max0 = call double @llvm.maximum.f64(double %a, double %b.fneg)
@@ -3555,15 +3555,15 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3]
+; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5]
-; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5]
+; GFX9-NEXT: v_cmp_o_f64_e64 vcc, v[0:1], -v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%c.fneg = fneg double %c
%max0 = call double @llvm.maximum.f64(double %a, double %b)
@@ -3591,15 +3591,15 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) {
; GFX9-NEXT: s_mov_b32 s1, 0x40200000
; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], s[0:1]
; GFX9-NEXT: v_mov_b32_e...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142354
More information about the llvm-commits
mailing list