[llvm] [AMDGPU] Add regbankselect rules for G_ADD/SUB and variants (PR #159860)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 16 02:44:39 PDT 2025
================
@@ -473,40 +473,14 @@ define amdgpu_ps i32 @s_uaddo_i32(i32 inreg %a, i32 inreg %b) {
ret i32 %ret
}
-define amdgpu_ps i64 @s_uaddo_i64(i64 inreg %a, i64 inreg %b) {
-; GFX7-LABEL: s_uaddo_i64:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_add_u32 s0, s0, s2
-; GFX7-NEXT: s_addc_u32 s1, s1, s3
-; GFX7-NEXT: s_cselect_b32 s2, 1, 0
-; GFX7-NEXT: s_add_u32 s0, s0, s2
-; GFX7-NEXT: s_addc_u32 s1, s1, 0
-; GFX7-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: s_uaddo_i64:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_add_u32 s0, s0, s2
-; GFX8-NEXT: s_addc_u32 s1, s1, s3
-; GFX8-NEXT: s_cselect_b32 s2, 1, 0
-; GFX8-NEXT: s_add_u32 s0, s0, s2
-; GFX8-NEXT: s_addc_u32 s1, s1, 0
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX9-LABEL: s_uaddo_i64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_add_u32 s0, s0, s2
-; GFX9-NEXT: s_addc_u32 s1, s1, s3
-; GFX9-NEXT: s_cselect_b32 s2, 1, 0
-; GFX9-NEXT: s_add_u32 s0, s0, s2
-; GFX9-NEXT: s_addc_u32 s1, s1, 0
-; GFX9-NEXT: ; return to shader part epilog
- %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
- %add = extractvalue {i64, i1} %uaddo, 0
- %of = extractvalue {i64, i1} %uaddo, 1
- %of.zext = zext i1 %of to i64
- %ret = add i64 %add, %of.zext
- ret i64 %ret
-}
+;define amdgpu_ps i64 @s_uaddo_i64(i64 inreg %a, i64 inreg %b) {
----------------
petar-avramovic wrote:
Why comment this out?
For this patch ll tests of interest would look like this
```
define void @s_uaddo_uadde(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
%uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%add = extractvalue {i64, i1} %uaddo, 0
%of = extractvalue {i64, i1} %uaddo, 1
%of32 = select i1 %of, i32 1, i32 0
store i64 %add, ptr addrspace(1) %res
store i32 %of32, ptr addrspace(1) %carry
ret void
}
define void @v_uaddo_uadde(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
%uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%add = extractvalue {i64, i1} %uaddo, 0
%of = extractvalue {i64, i1} %uaddo, 1
%of32 = select i1 %of, i32 1, i32 0
store i64 %add, ptr addrspace(1) %res
store i32 %of32, ptr addrspace(1) %carry
ret void
}
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
```
Legalizer would lower this and generate UADDO and UADDE
There is no intrinsic version of uadde, it seems to only come from legalizer lowering of i64 version of uaddo. Just to mention signed variant, sadd is fully lowered to just add and compares so amdgpu does not have SADDO or SADDE after legalizer.
https://github.com/llvm/llvm-project/pull/159860
More information about the llvm-commits
mailing list