[llvm] [AMDGPU] Add all type for bitcast on VReg_512 (PR #131775)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 18 05:57:18 PDT 2025


https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/131775

>From 96502df19db2b436c58349bd96bce3a2c8c2671d Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Mar 2025 18:10:20 +0800
Subject: [PATCH 1/2] Add all type for bitcast

---
 llvm/lib/Target/AMDGPU/SIInstructions.td   |  45 ++------
 llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll | 127 +++++++++++++++++++++
 2 files changed, 135 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index de77401eb0137..2eee87068a3b8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1841,58 +1841,29 @@ def : BitConvert <v12i32, v12f32, VReg_384>;
 def : BitConvert <v12f32, v12i32, VReg_384>;
 
 // 512-bit bitcast
-def : BitConvert <v32f16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v16i32, VReg_512>;
-def : BitConvert <v32f16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32f16, VReg_512>;
-def : BitConvert <v16i32, v32f16, VReg_512>;
-def : BitConvert <v32i16, v16i32, VReg_512>;
-def : BitConvert <v32i16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v16f32, VReg_512>;
-def : BitConvert <v16f32, v16i32, VReg_512>;
-def : BitConvert <v8i64,  v8f64,  VReg_512>;
-def : BitConvert <v8f64,  v8i64,  VReg_512>;
-def : BitConvert <v8i64,  v16i32, VReg_512>;
-def : BitConvert <v8f64,  v16i32, VReg_512>;
-def : BitConvert <v16i32, v8i64,  VReg_512>;
-def : BitConvert <v16i32, v8f64,  VReg_512>;
-def : BitConvert <v8i64,  v16f32, VReg_512>;
-def : BitConvert <v8f64,  v16f32, VReg_512>;
-def : BitConvert <v16f32, v8i64,  VReg_512>;
-def : BitConvert <v16f32, v8f64,  VReg_512>;
-
-
-
-def : BitConvert <v32bf16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32bf16, VReg_512>;
+foreach vt = VReg_512.RegTypes in {
+  foreach st = VReg_512.RegTypes in {
+    if !not(!eq (vt, st)) then {
+        def : BitConvert <vt, st, VReg_512>;
+    }
+  }
+}
+
 def : BitConvert <v32bf16, v32i16, SReg_512>;
 def : BitConvert <v32i16, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v32f16, SReg_512>;
 def : BitConvert <v32f16, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v16i32, VReg_512>;
-def : BitConvert <v16i32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16i32, SReg_512>;
 def : BitConvert <v16i32, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16f32, SReg_512>;
 def : BitConvert <v16f32, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v8f64, VReg_512>;
-def : BitConvert <v8f64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8f64, SReg_512>;
 def : BitConvert <v8f64, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v8i64, VReg_512>;
-def : BitConvert <v8i64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8i64, SReg_512>;
 def : BitConvert <v8i64, v32bf16, SReg_512>;
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index 5065f57c67dfd..b36ade582c878 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -1946,6 +1946,133 @@ end:
   ret void
 }
 
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32f16:
+define <32 x half> @v_bitcast_v8i64_to_v32f16(<8 x i64> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <8 x i64> %a, splat (i64 3)
+  %a2 = bitcast <8 x i64> %a1 to <32 x half>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x i64> %a to <32 x half>
+  br label %end
+end:
+  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32i16:
+define <32 x i16> @v_bitcast_v8i64_to_v32i16(<8 x i64> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <8 x i64> %a, splat (i64 3)
+  %a2 = bitcast <8 x i64> %a1 to <32 x i16>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x i64> %a to <32 x i16>
+  br label %end
+end:
+  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32i16:
+define <32 x i16> @v_bitcast_v8f64_to_v32i16(<8 x double> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+  %a2 = bitcast <8 x double> %a1 to <32 x i16>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x double> %a to <32 x i16>
+  br label %end
+end:
+  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32f16:
+define <32 x half> @v_bitcast_v8f64_to_v32f16(<8 x double> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+  %a2 = bitcast <8 x double> %a1 to <32 x half>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x double> %a to <32 x half>
+  br label %end
+end:
+  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8i64:
+define <8 x i64> @v_bitcast_v32f16_to_v8i64(<32 x half> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+  %a2 = bitcast <32 x half> %a1 to <8 x i64>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x half> %a to <8 x i64>
+  br label %end
+end:
+  %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8f64:
+define <8 x double> @v_bitcast_v32f16_to_v8f64(<32 x half> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+  %a2 = bitcast <32 x half> %a1 to <8 x double>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x half> %a to <8 x double>
+  br label %end
+end:
+  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x double> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8i64:
+define <8 x i64> @v_bitcast_v32i16_to_v8i64(<32 x i16> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <32 x i16> %a, splat (i16 3)
+  %a2 = bitcast <32 x i16> %a1 to <8 x i64>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x i16> %a to <8 x i64>
+  br label %end
+end:
+  %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8f64:
+define <8 x double> @v_bitcast_v32i16_to_v8f64(<32 x i16> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <32 x i16> %a, splat (i16 3)
+  %a2 = bitcast <32 x i16> %a1 to <8 x double>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x i16> %a to <8 x double>
+  br label %end
+end:
+  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x double> %phi
+}
 
 
 

>From 870854892dfe8508aa3b3278f7dc0c0f8d396ede Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Mar 2025 20:57:02 +0800
Subject: [PATCH 2/2] fix comment

---
 llvm/lib/Target/AMDGPU/SIInstructions.td   |  46 +++++++--
 llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll | 114 ---------------------
 llvm/test/lit.cfg.py                       |   2 +-
 3 files changed, 39 insertions(+), 123 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2eee87068a3b8..09d04461da514 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1841,29 +1841,59 @@ def : BitConvert <v12i32, v12f32, VReg_384>;
 def : BitConvert <v12f32, v12i32, VReg_384>;
 
 // 512-bit bitcast
-foreach vt = VReg_512.RegTypes in {
-  foreach st = VReg_512.RegTypes in {
-    if !not(!eq (vt, st)) then {
-        def : BitConvert <vt, st, VReg_512>;
-    }
-  }
-}
-
+// 512-bit bitcast
+def : BitConvert <v32f16, v32i16, VReg_512>;
+def : BitConvert <v32i16, v32f16, VReg_512>;
+def : BitConvert <v32f16, v16i32, VReg_512>;
+def : BitConvert <v32f16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32f16, VReg_512>;
+def : BitConvert <v16i32, v32f16, VReg_512>;
+def : BitConvert <v32i16, v16i32, VReg_512>;
+def : BitConvert <v32i16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32i16, VReg_512>;
+def : BitConvert <v16i32, v32i16, VReg_512>;
+def : BitConvert <v16i32, v16f32, VReg_512>;
+def : BitConvert <v16f32, v16i32, VReg_512>;
+def : BitConvert <v8i64,  v8f64,  VReg_512>;
+def : BitConvert <v8f64,  v8i64,  VReg_512>;
+def : BitConvert <v8i64,  v16i32, VReg_512>;
+def : BitConvert <v8f64,  v16i32, VReg_512>;
+def : BitConvert <v16i32, v8i64,  VReg_512>;
+def : BitConvert <v16i32, v8f64,  VReg_512>;
+def : BitConvert <v8i64,  v16f32, VReg_512>;
+def : BitConvert <v8f64,  v16f32, VReg_512>;
+def : BitConvert <v16f32, v8i64,  VReg_512>;
+def : BitConvert <v16f32, v8f64,  VReg_512>;
+def : BitConvert <v8i64,  v32f16, VReg_512>;
+
+
+def : BitConvert <v32bf16, v32i16, VReg_512>;
+def : BitConvert <v32i16, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v32i16, SReg_512>;
 def : BitConvert <v32i16, v32bf16, SReg_512>;
 
+def : BitConvert <v32bf16, v32f16, VReg_512>;
+def : BitConvert <v32f16, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v32f16, SReg_512>;
 def : BitConvert <v32f16, v32bf16, SReg_512>;
 
+def : BitConvert <v32bf16, v16i32, VReg_512>;
+def : BitConvert <v16i32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16i32, SReg_512>;
 def : BitConvert <v16i32, v32bf16, SReg_512>;
 
+def : BitConvert <v32bf16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16f32, SReg_512>;
 def : BitConvert <v16f32, v32bf16, SReg_512>;
 
+def : BitConvert <v32bf16, v8f64, VReg_512>;
+def : BitConvert <v8f64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8f64, SReg_512>;
 def : BitConvert <v8f64, v32bf16, SReg_512>;
 
+def : BitConvert <v32bf16, v8i64, VReg_512>;
+def : BitConvert <v8i64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8i64, SReg_512>;
 def : BitConvert <v8i64, v32bf16, SReg_512>;
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index b36ade582c878..1f94d9b6b3bda 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -1946,70 +1946,6 @@ end:
   ret void
 }
 
-; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32f16:
-define <32 x half> @v_bitcast_v8i64_to_v32f16(<8 x i64> %a, i32 %b) {
-  %cmp = icmp eq i32 %b, 0
-  br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
-  %a1 = add <8 x i64> %a, splat (i64 3)
-  %a2 = bitcast <8 x i64> %a1 to <32 x half>
-  br label %end
-cmp.false:
-  %a3 = bitcast <8 x i64> %a to <32 x half>
-  br label %end
-end:
-  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
-  ret <32 x half> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32i16:
-define <32 x i16> @v_bitcast_v8i64_to_v32i16(<8 x i64> %a, i32 %b) {
-  %cmp = icmp eq i32 %b, 0
-  br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
-  %a1 = add <8 x i64> %a, splat (i64 3)
-  %a2 = bitcast <8 x i64> %a1 to <32 x i16>
-  br label %end
-cmp.false:
-  %a3 = bitcast <8 x i64> %a to <32 x i16>
-  br label %end
-end:
-  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
-  ret <32 x i16> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32i16:
-define <32 x i16> @v_bitcast_v8f64_to_v32i16(<8 x double> %a, i32 %b) {
-  %cmp = icmp eq i32 %b, 0
-  br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
-  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
-  %a2 = bitcast <8 x double> %a1 to <32 x i16>
-  br label %end
-cmp.false:
-  %a3 = bitcast <8 x double> %a to <32 x i16>
-  br label %end
-end:
-  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
-  ret <32 x i16> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32f16:
-define <32 x half> @v_bitcast_v8f64_to_v32f16(<8 x double> %a, i32 %b) {
-  %cmp = icmp eq i32 %b, 0
-  br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
-  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
-  %a2 = bitcast <8 x double> %a1 to <32 x half>
-  br label %end
-cmp.false:
-  %a3 = bitcast <8 x double> %a to <32 x half>
-  br label %end
-end:
-  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
-  ret <32 x half> %phi
-}
-
 ; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8i64:
 define <8 x i64> @v_bitcast_v32f16_to_v8i64(<32 x half> %a, i32 %b) {
   %cmp = icmp eq i32 %b, 0
@@ -2026,56 +1962,6 @@ end:
   ret <8 x i64> %phi
 }
 
-; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8f64:
-define <8 x double> @v_bitcast_v32f16_to_v8f64(<32 x half> %a, i32 %b) {
-  %cmp = icmp eq i32 %b, 0
-  br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
-  %a1 = fadd <32 x half> %a, splat (half 0xH0200)
-  %a2 = bitcast <32 x half> %a1 to <8 x double>
-  br label %end
-cmp.false:
-  %a3 = bitcast <32 x half> %a to <8 x double>
-  br label %end
-end:
-  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
-  ret <8 x double> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8i64:
-define <8 x i64> @v_bitcast_v32i16_to_v8i64(<32 x i16> %a, i32 %b) {
-  %cmp = icmp eq i32 %b, 0
-  br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
-  %a1 = add <32 x i16> %a, splat (i16 3)
-  %a2 = bitcast <32 x i16> %a1 to <8 x i64>
-  br label %end
-cmp.false:
-  %a3 = bitcast <32 x i16> %a to <8 x i64>
-  br label %end
-end:
-  %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
-  ret <8 x i64> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8f64:
-define <8 x double> @v_bitcast_v32i16_to_v8f64(<32 x i16> %a, i32 %b) {
-  %cmp = icmp eq i32 %b, 0
-  br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
-  %a1 = add <32 x i16> %a, splat (i16 3)
-  %a2 = bitcast <32 x i16> %a1 to <8 x double>
-  br label %end
-cmp.false:
-  %a3 = bitcast <32 x i16> %a to <8 x double>
-  br label %end
-end:
-  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
-  ret <8 x double> %phi
-}
-
-
-
 
 
 
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index aad7a088551b2..50921879cd1f2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -466,7 +466,7 @@ def have_cxx_shared_library():
         print("could not exec llvm-readobj")
         return False
 
-    readobj_out = readobj_cmd.stdout.read().decode("ascii")
+    readobj_out = readobj_cmd.stdout.read().decode("utf-8")
     readobj_cmd.wait()
 
     regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")



More information about the llvm-commits mailing list