[llvm] [AMDGPU] Eliminate unnecessary packing in wider f16 vectors for sdwa/opsel-able instruction (PR #137137)

Tue May 13 01:15:37 PDT 2025

================
@@ -0,0 +1,380 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass si-peephole-sdwa,dead-mi-elimination -o - %s | FileCheck -check-prefixes=GFX9 %s
+
+--- |
+  source_filename = "/home/vikashgu/work/upstream/llvm-project/llvm/test/CodeGen/AMDGPU/vector-fp16.ll"
+  target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+  target triple = "amdgcn-amd-amdhsa"
+
+  define <4 x half> @sin_v4f16(<4 x half> %a) #0 {
+    %res = call <4 x half> @llvm.sin.v4f16(<4 x half> %a)
+    ret <4 x half> %res
+  }
+
+  define <4 x half> @cos_v4f16(<4 x half> %a) #0 {
+    %res = call <4 x half> @llvm.cos.v4f16(<4 x half> %a)
+    ret <4 x half> %res
+  }
+
+  define <4 x half> @log_v4f16(<4 x half> %a) #0 {
+    %res = call <4 x half> @llvm.log.v4f16(<4 x half> %a)
+    ret <4 x half> %res
+  }
+
+  define <4 x half> @log2_v4f16(<4 x half> %a) #0 {
----------------
arsenm wrote:

The point of a MIR test is to stress all of the structural edge cases and/or have minimal MIR instruction context. We're not getting that by just testing structurally identical opcodes in different functions 

https://github.com/llvm/llvm-project/pull/137137