[llvm] [AMDGPU][GlobalISel] Align `selectVOP3PMadMixModsImpl` with the `SelectionDAG` counterpart (PR #110168)

Mon Sep 30 11:17:37 PDT 2024

================
@@ -0,0 +1,45 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select,machineverifier -o - %s | FileCheck -check-prefixes=GFX9 %s
+
+---
+name: foo
+legalized: true
+regBankSelected: true
+machineFunctionInfo:
+  mode:
+    fp32-output-denormals: false
+    fp32-input-denormals: false
+body: |
+  bb.0:
+    ; GFX9-LABEL: name: foo
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+    ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX9-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc
+    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_LSHR_B32_]]
+    ; GFX9-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_LSHR_B32_1]]
+    ; GFX9-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
+    ; GFX9-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[V_CVT_F32_F16_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX9-NEXT: [[V_MAD_MIX_F32_:%[0-9]+]]:vgpr_32 = V_MAD_MIX_F32 9, [[COPY3]], 0, [[V_RCP_F32_e64_]], 8, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX9-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[V_MAD_MIX_F32_]], implicit $exec
+    ; GFX9-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+    ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = G_CONSTANT i32 16
+    %3:sgpr(s32) = G_LSHR %0:sgpr, %2:sgpr(s32)
+    %4:sgpr(s16) = G_TRUNC %3:sgpr(s32)
+    %5:sgpr(s32) = G_LSHR %1:sgpr, %2:sgpr(s32)
+    %6:sgpr(s16) = G_TRUNC %5:sgpr(s32)
+    %7:vgpr(s16) = COPY %4:sgpr(s16)
+    %8:vgpr(s32) = G_FPEXT %7:vgpr(s16)
+    %9:vgpr(s16) = COPY %6:sgpr(s16)
+    %10:vgpr(s32) = G_FPEXT %9:vgpr(s16)
+    %11:vgpr(s32) = G_FNEG %10:vgpr
+    %12:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %10:vgpr(s32)
+    %13:vgpr(s32) = G_FMAD %11:vgpr, %12:vgpr, %8:vgpr
----------------
shiltian wrote:

> This also should test the FMA case

Done.

> It's preferable to have IR tests for this issue.

How to make a IR test using `mad`? LLVM has `fma` but no `mad`. This sequence of code was reduced from the MIR sequence generated in https://github.com/llvm/llvm-project/pull/109295 but that PR is blocked by this one.

https://github.com/llvm/llvm-project/pull/110168