[llvm] [AMDGPU] Add all type for bitcast on VReg_512 (PR #131775)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 06:00:12 PDT 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/131775
>From 96502df19db2b436c58349bd96bce3a2c8c2671d Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Mar 2025 18:10:20 +0800
Subject: [PATCH 1/4] Add all type for bitcast
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 45 ++------
llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll | 127 +++++++++++++++++++++
2 files changed, 135 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index de77401eb0137..2eee87068a3b8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1841,58 +1841,29 @@ def : BitConvert <v12i32, v12f32, VReg_384>;
def : BitConvert <v12f32, v12i32, VReg_384>;
// 512-bit bitcast
-def : BitConvert <v32f16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v16i32, VReg_512>;
-def : BitConvert <v32f16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32f16, VReg_512>;
-def : BitConvert <v16i32, v32f16, VReg_512>;
-def : BitConvert <v32i16, v16i32, VReg_512>;
-def : BitConvert <v32i16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v16f32, VReg_512>;
-def : BitConvert <v16f32, v16i32, VReg_512>;
-def : BitConvert <v8i64, v8f64, VReg_512>;
-def : BitConvert <v8f64, v8i64, VReg_512>;
-def : BitConvert <v8i64, v16i32, VReg_512>;
-def : BitConvert <v8f64, v16i32, VReg_512>;
-def : BitConvert <v16i32, v8i64, VReg_512>;
-def : BitConvert <v16i32, v8f64, VReg_512>;
-def : BitConvert <v8i64, v16f32, VReg_512>;
-def : BitConvert <v8f64, v16f32, VReg_512>;
-def : BitConvert <v16f32, v8i64, VReg_512>;
-def : BitConvert <v16f32, v8f64, VReg_512>;
-
-
-
-def : BitConvert <v32bf16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32bf16, VReg_512>;
+foreach vt = VReg_512.RegTypes in {
+ foreach st = VReg_512.RegTypes in {
+ if !not(!eq (vt, st)) then {
+ def : BitConvert <vt, st, VReg_512>;
+ }
+ }
+}
+
def : BitConvert <v32bf16, v32i16, SReg_512>;
def : BitConvert <v32i16, v32bf16, SReg_512>;
-def : BitConvert <v32bf16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v32f16, SReg_512>;
def : BitConvert <v32f16, v32bf16, SReg_512>;
-def : BitConvert <v32bf16, v16i32, VReg_512>;
-def : BitConvert <v16i32, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v16i32, SReg_512>;
def : BitConvert <v16i32, v32bf16, SReg_512>;
-def : BitConvert <v32bf16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v16f32, SReg_512>;
def : BitConvert <v16f32, v32bf16, SReg_512>;
-def : BitConvert <v32bf16, v8f64, VReg_512>;
-def : BitConvert <v8f64, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v8f64, SReg_512>;
def : BitConvert <v8f64, v32bf16, SReg_512>;
-def : BitConvert <v32bf16, v8i64, VReg_512>;
-def : BitConvert <v8i64, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v8i64, SReg_512>;
def : BitConvert <v8i64, v32bf16, SReg_512>;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index 5065f57c67dfd..b36ade582c878 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -1946,6 +1946,133 @@ end:
ret void
}
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32f16:
+define <32 x half> @v_bitcast_v8i64_to_v32f16(<8 x i64> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = add <8 x i64> %a, splat (i64 3)
+ %a2 = bitcast <8 x i64> %a1 to <32 x half>
+ br label %end
+cmp.false:
+ %a3 = bitcast <8 x i64> %a to <32 x half>
+ br label %end
+end:
+ %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32i16:
+define <32 x i16> @v_bitcast_v8i64_to_v32i16(<8 x i64> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = add <8 x i64> %a, splat (i64 3)
+ %a2 = bitcast <8 x i64> %a1 to <32 x i16>
+ br label %end
+cmp.false:
+ %a3 = bitcast <8 x i64> %a to <32 x i16>
+ br label %end
+end:
+ %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32i16:
+define <32 x i16> @v_bitcast_v8f64_to_v32i16(<8 x double> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+ %a2 = bitcast <8 x double> %a1 to <32 x i16>
+ br label %end
+cmp.false:
+ %a3 = bitcast <8 x double> %a to <32 x i16>
+ br label %end
+end:
+ %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32f16:
+define <32 x half> @v_bitcast_v8f64_to_v32f16(<8 x double> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+ %a2 = bitcast <8 x double> %a1 to <32 x half>
+ br label %end
+cmp.false:
+ %a3 = bitcast <8 x double> %a to <32 x half>
+ br label %end
+end:
+ %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8i64:
+define <8 x i64> @v_bitcast_v32f16_to_v8i64(<32 x half> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+ %a2 = bitcast <32 x half> %a1 to <8 x i64>
+ br label %end
+cmp.false:
+ %a3 = bitcast <32 x half> %a to <8 x i64>
+ br label %end
+end:
+ %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8f64:
+define <8 x double> @v_bitcast_v32f16_to_v8f64(<32 x half> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+ %a2 = bitcast <32 x half> %a1 to <8 x double>
+ br label %end
+cmp.false:
+ %a3 = bitcast <32 x half> %a to <8 x double>
+ br label %end
+end:
+ %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <8 x double> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8i64:
+define <8 x i64> @v_bitcast_v32i16_to_v8i64(<32 x i16> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = add <32 x i16> %a, splat (i16 3)
+ %a2 = bitcast <32 x i16> %a1 to <8 x i64>
+ br label %end
+cmp.false:
+ %a3 = bitcast <32 x i16> %a to <8 x i64>
+ br label %end
+end:
+ %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8f64:
+define <8 x double> @v_bitcast_v32i16_to_v8f64(<32 x i16> %a, i32 %b) {
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+ %a1 = add <32 x i16> %a, splat (i16 3)
+ %a2 = bitcast <32 x i16> %a1 to <8 x double>
+ br label %end
+cmp.false:
+ %a3 = bitcast <32 x i16> %a to <8 x double>
+ br label %end
+end:
+ %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+ ret <8 x double> %phi
+}
>From 870854892dfe8508aa3b3278f7dc0c0f8d396ede Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Mar 2025 20:57:02 +0800
Subject: [PATCH 2/4] fix comment
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 46 +++++++--
llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll | 114 ---------------------
llvm/test/lit.cfg.py | 2 +-
3 files changed, 39 insertions(+), 123 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2eee87068a3b8..09d04461da514 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1841,29 +1841,59 @@ def : BitConvert <v12i32, v12f32, VReg_384>;
def : BitConvert <v12f32, v12i32, VReg_384>;
// 512-bit bitcast
-foreach vt = VReg_512.RegTypes in {
- foreach st = VReg_512.RegTypes in {
- if !not(!eq (vt, st)) then {
- def : BitConvert <vt, st, VReg_512>;
- }
- }
-}
-
+// 512-bit bitcast
+def : BitConvert <v32f16, v32i16, VReg_512>;
+def : BitConvert <v32i16, v32f16, VReg_512>;
+def : BitConvert <v32f16, v16i32, VReg_512>;
+def : BitConvert <v32f16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32f16, VReg_512>;
+def : BitConvert <v16i32, v32f16, VReg_512>;
+def : BitConvert <v32i16, v16i32, VReg_512>;
+def : BitConvert <v32i16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32i16, VReg_512>;
+def : BitConvert <v16i32, v32i16, VReg_512>;
+def : BitConvert <v16i32, v16f32, VReg_512>;
+def : BitConvert <v16f32, v16i32, VReg_512>;
+def : BitConvert <v8i64, v8f64, VReg_512>;
+def : BitConvert <v8f64, v8i64, VReg_512>;
+def : BitConvert <v8i64, v16i32, VReg_512>;
+def : BitConvert <v8f64, v16i32, VReg_512>;
+def : BitConvert <v16i32, v8i64, VReg_512>;
+def : BitConvert <v16i32, v8f64, VReg_512>;
+def : BitConvert <v8i64, v16f32, VReg_512>;
+def : BitConvert <v8f64, v16f32, VReg_512>;
+def : BitConvert <v16f32, v8i64, VReg_512>;
+def : BitConvert <v16f32, v8f64, VReg_512>;
+def : BitConvert <v8i64, v32f16, VReg_512>;
+
+
+def : BitConvert <v32bf16, v32i16, VReg_512>;
+def : BitConvert <v32i16, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v32i16, SReg_512>;
def : BitConvert <v32i16, v32bf16, SReg_512>;
+def : BitConvert <v32bf16, v32f16, VReg_512>;
+def : BitConvert <v32f16, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v32f16, SReg_512>;
def : BitConvert <v32f16, v32bf16, SReg_512>;
+def : BitConvert <v32bf16, v16i32, VReg_512>;
+def : BitConvert <v16i32, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v16i32, SReg_512>;
def : BitConvert <v16i32, v32bf16, SReg_512>;
+def : BitConvert <v32bf16, v16f32, VReg_512>;
+def : BitConvert <v16f32, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v16f32, SReg_512>;
def : BitConvert <v16f32, v32bf16, SReg_512>;
+def : BitConvert <v32bf16, v8f64, VReg_512>;
+def : BitConvert <v8f64, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v8f64, SReg_512>;
def : BitConvert <v8f64, v32bf16, SReg_512>;
+def : BitConvert <v32bf16, v8i64, VReg_512>;
+def : BitConvert <v8i64, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v8i64, SReg_512>;
def : BitConvert <v8i64, v32bf16, SReg_512>;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index b36ade582c878..1f94d9b6b3bda 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -1946,70 +1946,6 @@ end:
ret void
}
-; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32f16:
-define <32 x half> @v_bitcast_v8i64_to_v32f16(<8 x i64> %a, i32 %b) {
- %cmp = icmp eq i32 %b, 0
- br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
- %a1 = add <8 x i64> %a, splat (i64 3)
- %a2 = bitcast <8 x i64> %a1 to <32 x half>
- br label %end
-cmp.false:
- %a3 = bitcast <8 x i64> %a to <32 x half>
- br label %end
-end:
- %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
- ret <32 x half> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32i16:
-define <32 x i16> @v_bitcast_v8i64_to_v32i16(<8 x i64> %a, i32 %b) {
- %cmp = icmp eq i32 %b, 0
- br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
- %a1 = add <8 x i64> %a, splat (i64 3)
- %a2 = bitcast <8 x i64> %a1 to <32 x i16>
- br label %end
-cmp.false:
- %a3 = bitcast <8 x i64> %a to <32 x i16>
- br label %end
-end:
- %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
- ret <32 x i16> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32i16:
-define <32 x i16> @v_bitcast_v8f64_to_v32i16(<8 x double> %a, i32 %b) {
- %cmp = icmp eq i32 %b, 0
- br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
- %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
- %a2 = bitcast <8 x double> %a1 to <32 x i16>
- br label %end
-cmp.false:
- %a3 = bitcast <8 x double> %a to <32 x i16>
- br label %end
-end:
- %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
- ret <32 x i16> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32f16:
-define <32 x half> @v_bitcast_v8f64_to_v32f16(<8 x double> %a, i32 %b) {
- %cmp = icmp eq i32 %b, 0
- br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
- %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
- %a2 = bitcast <8 x double> %a1 to <32 x half>
- br label %end
-cmp.false:
- %a3 = bitcast <8 x double> %a to <32 x half>
- br label %end
-end:
- %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
- ret <32 x half> %phi
-}
-
; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8i64:
define <8 x i64> @v_bitcast_v32f16_to_v8i64(<32 x half> %a, i32 %b) {
%cmp = icmp eq i32 %b, 0
@@ -2026,56 +1962,6 @@ end:
ret <8 x i64> %phi
}
-; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8f64:
-define <8 x double> @v_bitcast_v32f16_to_v8f64(<32 x half> %a, i32 %b) {
- %cmp = icmp eq i32 %b, 0
- br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
- %a1 = fadd <32 x half> %a, splat (half 0xH0200)
- %a2 = bitcast <32 x half> %a1 to <8 x double>
- br label %end
-cmp.false:
- %a3 = bitcast <32 x half> %a to <8 x double>
- br label %end
-end:
- %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
- ret <8 x double> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8i64:
-define <8 x i64> @v_bitcast_v32i16_to_v8i64(<32 x i16> %a, i32 %b) {
- %cmp = icmp eq i32 %b, 0
- br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
- %a1 = add <32 x i16> %a, splat (i16 3)
- %a2 = bitcast <32 x i16> %a1 to <8 x i64>
- br label %end
-cmp.false:
- %a3 = bitcast <32 x i16> %a to <8 x i64>
- br label %end
-end:
- %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
- ret <8 x i64> %phi
-}
-
-; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8f64:
-define <8 x double> @v_bitcast_v32i16_to_v8f64(<32 x i16> %a, i32 %b) {
- %cmp = icmp eq i32 %b, 0
- br i1 %cmp, label %cmp.true, label %cmp.false
-cmp.true:
- %a1 = add <32 x i16> %a, splat (i16 3)
- %a2 = bitcast <32 x i16> %a1 to <8 x double>
- br label %end
-cmp.false:
- %a3 = bitcast <32 x i16> %a to <8 x double>
- br label %end
-end:
- %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
- ret <8 x double> %phi
-}
-
-
-
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index aad7a088551b2..50921879cd1f2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -466,7 +466,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("ascii")
+ readobj_out = readobj_cmd.stdout.read().decode("utf-8")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From 5046309472c1e9a1524142657d30ace18093019a Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Mar 2025 20:58:42 +0800
Subject: [PATCH 3/4] fix lit
---
llvm/test/lit.cfg.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 50921879cd1f2..aad7a088551b2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -466,7 +466,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("utf-8")
+ readobj_out = readobj_cmd.stdout.read().decode("ascii")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From c31f255f25f1720664f7d0f99ed7b211092da46b Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Mar 2025 20:59:55 +0800
Subject: [PATCH 4/4] fix comment
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 09d04461da514..43ddccd8fb8c5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1840,7 +1840,6 @@ def : BitConvert <v12f32, v12i32, SReg_384>;
def : BitConvert <v12i32, v12f32, VReg_384>;
def : BitConvert <v12f32, v12i32, VReg_384>;
-// 512-bit bitcast
// 512-bit bitcast
def : BitConvert <v32f16, v32i16, VReg_512>;
def : BitConvert <v32i16, v32f16, VReg_512>;
More information about the llvm-commits
mailing list