[llvm-branch-commits] [llvm] AMDGPU: Use vgpr to implement divergent i32->i64 anyext (PR #168167)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 14 18:09:12 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/168167
Handle this for consistency with the zext case.
>From 41140cee8808f0703509a43fd9f8a51b52c6ddfc Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 14 Nov 2025 16:32:25 -0800
Subject: [PATCH] AMDGPU: Use vgpr to implement divergent i32->i64 anyext
Handle this for consistency with the zext case.
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 7 ++++-
llvm/test/CodeGen/AMDGPU/rem_i128.ll | 30 +++++++------------
.../test/CodeGen/AMDGPU/wwm-reserved-spill.ll | 3 +-
llvm/test/CodeGen/AMDGPU/wwm-reserved.ll | 6 ++--
4 files changed, 19 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b7256b81ee826..6cc9b3cc67530 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2951,10 +2951,15 @@ def : GCNPat <
>;
def : GCNPat <
- (i64 (anyext i32:$src)),
+ (i64 (UniformUnaryFrag<anyext> i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
>;
+def : GCNPat <
+ (i64 (anyext i32:$src)),
+ (REG_SEQUENCE VReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
+>;
+
class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
(i64 (ext i1:$src)),
(REG_SEQUENCE VReg_64,
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index d36d95d182ab7..32862f73d2f29 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -1161,9 +1161,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_add3_u32 v8, v0, v2, v8
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v9
; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 killed $vgpr17_vgpr18 killed $exec
@@ -1190,9 +1189,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_add3_u32 v14, v9, v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15
; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec
@@ -1221,9 +1219,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[14:15]
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16
; GFX9-O0-NEXT: v_or_b32_e64 v9, v9, v14
@@ -1240,9 +1237,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9
@@ -1280,9 +1276,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v23, v24
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5
; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v24, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s4, v[23:24]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v24
; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
@@ -2447,9 +2442,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v10
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[17:18], s4, v[2:3]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v18
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4
@@ -2476,9 +2470,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v11
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3]
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec
@@ -2507,9 +2500,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s5
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16]
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16
; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12
@@ -2526,9 +2518,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16]
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16
; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v17
@@ -2566,9 +2557,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s5
; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v20, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20]
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20
; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index fe183287c46c2..81e17400973a4 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -475,9 +475,8 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline {
; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
; GFX9-O0-NEXT: ; implicit-def: $sgpr36
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s35
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35
; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s34, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index 2aacb96ca4306..72672c8b6efad 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -479,9 +479,8 @@ define i64 @called_i64(i64 %a) noinline {
; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
@@ -1310,9 +1309,8 @@ define i64 @strict_wwm_called_i64(i64 %a) noinline {
; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5
; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
More information about the llvm-branch-commits
mailing list