[llvm] [AMDGPU][GlobalISel] Add frexp_mant/fract intrinsic RegBankLegalize r… (PR #177512)

Fri Jan 23 08:40:01 PST 2026

https://github.com/vangthao95 updated https://github.com/llvm/llvm-project/pull/177512

>From d8bf6787fbe3926e3c335e582c9ae726cad360a6 Mon Sep 17 00:00:00 2001
From: Vang Thao <vang.thao at amd.com>
Date: Thu, 22 Jan 2026 17:29:24 -0800
Subject: [PATCH 1/2] [AMDGPU][GlobalISel] Add frexp_mant/fract intrinsic
 RegBankLegalize rules

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |  8 ++
 .../AMDGPU/GlobalISel/llvm.amdgcn.fract.ll    | 76 +++++++++++++++++++
 .../GlobalISel/llvm.amdgcn.frexp.mant.ll      | 76 +++++++++++++++++++
 3 files changed, 160 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.frexp.mant.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 7441846dc3e34..96c94d73ced7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1204,6 +1204,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
       .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
 
+  addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
+      .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
+      .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
+      .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
+      .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
+      .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
+      .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
+
   addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
       .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
       .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll
new file mode 100644
index 0000000000000..ec34086035659
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+declare half @llvm.amdgcn.fract.f16(half)
+declare float @llvm.amdgcn.fract.f32(float)
+declare double @llvm.amdgcn.fract.f64(double)
+
+define amdgpu_ps half @s_fract_f16(half inreg %src) {
+; GFX9-LABEL: s_fract_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_fract_f16_e32 v0, s0
+; GFX9-NEXT:    v_add_f16_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %fract = call half @llvm.amdgcn.fract.f16(half %src)
+  %res = fadd half %fract, %fract
+  ret half %res
+}
+
+define amdgpu_ps half @v_fract_f16(half %src) {
+; GFX9-LABEL: v_fract_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_fract_f16_e32 v0, v0
+; GFX9-NEXT:    v_add_f16_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %fract = call half @llvm.amdgcn.fract.f16(half %src)
+  %res = fadd half %fract, %fract
+  ret half %res
+}
+
+define amdgpu_ps float @s_fract_f32(float inreg %src) {
+; GFX9-LABEL: s_fract_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_fract_f32_e32 v0, s0
+; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %fract = call float @llvm.amdgcn.fract.f32(float %src)
+  %res = fadd float %fract, %fract
+  ret float %res
+}
+
+define amdgpu_ps float @v_fract_f32(float %src) {
+; GFX9-LABEL: v_fract_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_fract_f32_e32 v0, v0
+; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %fract = call float @llvm.amdgcn.fract.f32(float %src)
+  %res = fadd float %fract, %fract
+  ret float %res
+}
+
+define amdgpu_ps double @s_fract_f64(double inreg %src) {
+; GFX9-LABEL: s_fract_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_fract_f64_e32 v[0:1], s[0:1]
+; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    ; return to shader part epilog
+  %fract = call double @llvm.amdgcn.fract.f64(double %src)
+  %res = fadd double %fract, %fract
+  ret double %res
+}
+
+define amdgpu_ps double @v_fract_f64(double %src) {
+; GFX9-LABEL: v_fract_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    ; return to shader part epilog
+  %fract = call double @llvm.amdgcn.fract.f64(double %src)
+  %res = fadd double %fract, %fract
+  ret double %res
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.frexp.mant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.frexp.mant.ll
new file mode 100644
index 0000000000000..8dfcb85c714a8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.frexp.mant.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+declare half @llvm.amdgcn.frexp.mant.f16(half)
+declare float @llvm.amdgcn.frexp.mant.f32(float)
+declare double @llvm.amdgcn.frexp.mant.f64(double)
+
+define amdgpu_ps half @s_frexp_mant_f16(half inreg %src) {
+; GFX9-LABEL: s_frexp_mant_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_frexp_mant_f16_e32 v0, s0
+; GFX9-NEXT:    v_add_f16_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %mant = call half @llvm.amdgcn.frexp.mant.f16(half %src)
+  %res = fadd half %mant, %mant
+  ret half %res
+}
+
+define amdgpu_ps half @v_frexp_mant_f16(half %src) {
+; GFX9-LABEL: v_frexp_mant_f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_frexp_mant_f16_e32 v0, v0
+; GFX9-NEXT:    v_add_f16_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %mant = call half @llvm.amdgcn.frexp.mant.f16(half %src)
+  %res = fadd half %mant, %mant
+  ret half %res
+}
+
+define amdgpu_ps float @s_frexp_mant_f32(float inreg %src) {
+; GFX9-LABEL: s_frexp_mant_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_frexp_mant_f32_e32 v0, s0
+; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %mant = call float @llvm.amdgcn.frexp.mant.f32(float %src)
+  %res = fadd float %mant, %mant
+  ret float %res
+}
+
+define amdgpu_ps float @v_frexp_mant_f32(float %src) {
+; GFX9-LABEL: v_frexp_mant_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_frexp_mant_f32_e32 v0, v0
+; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+  %mant = call float @llvm.amdgcn.frexp.mant.f32(float %src)
+  %res = fadd float %mant, %mant
+  ret float %res
+}
+
+define amdgpu_ps double @s_frexp_mant_f64(double inreg %src) {
+; GFX9-LABEL: s_frexp_mant_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_frexp_mant_f64_e32 v[0:1], s[0:1]
+; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    ; return to shader part epilog
+  %mant = call double @llvm.amdgcn.frexp.mant.f64(double %src)
+  %res = fadd double %mant, %mant
+  ret double %res
+}
+
+define amdgpu_ps double @v_frexp_mant_f64(double %src) {
+; GFX9-LABEL: v_frexp_mant_f64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_frexp_mant_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[0:1]
+; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX9-NEXT:    ; return to shader part epilog
+  %mant = call double @llvm.amdgcn.frexp.mant.f64(double %src)
+  %res = fadd double %mant, %mant
+  ret double %res
+}

>From 26b7c893557e9fa1a42b614ecdf5c03a5165fcd1 Mon Sep 17 00:00:00 2001
From: Vang Thao <vang.thao at amd.com>
Date: Fri, 23 Jan 2026 08:38:51 -0800
Subject: [PATCH 2/2] Change fract test to gfx12 and changed f64 salu fract
 test.

---
 .../AMDGPU/GlobalISel/llvm.amdgcn.fract.ll    | 98 +++++++++++--------
 1 file changed, 59 insertions(+), 39 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll
index ec34086035659..bf23b635c4d22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fract.ll
@@ -1,75 +1,95 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
 
 declare half @llvm.amdgcn.fract.f16(half)
 declare float @llvm.amdgcn.fract.f32(float)
 declare double @llvm.amdgcn.fract.f64(double)
 
 define amdgpu_ps half @s_fract_f16(half inreg %src) {
-; GFX9-LABEL: s_fract_f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_fract_f16_e32 v0, s0
-; GFX9-NEXT:    v_add_f16_e32 v0, v0, v0
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX12-LABEL: s_fract_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_fract_f16_e32 v0, s0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_2)
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_add_f16 s0, s0, s0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
   %fract = call half @llvm.amdgcn.fract.f16(half %src)
   %res = fadd half %fract, %fract
   ret half %res
 }
 
 define amdgpu_ps half @v_fract_f16(half %src) {
-; GFX9-LABEL: v_fract_f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_fract_f16_e32 v0, v0
-; GFX9-NEXT:    v_add_f16_e32 v0, v0, v0
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX12-LABEL: v_fract_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_fract_f16_e32 v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_add_f16_e32 v0, v0, v0
+; GFX12-NEXT:    ; return to shader part epilog
   %fract = call half @llvm.amdgcn.fract.f16(half %src)
   %res = fadd half %fract, %fract
   ret half %res
 }
 
 define amdgpu_ps float @s_fract_f32(float inreg %src) {
-; GFX9-LABEL: s_fract_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_fract_f32_e32 v0, s0
-; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX12-LABEL: s_fract_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_fract_f32_e32 v0, s0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_2)
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_add_f32 s0, s0, s0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
   %fract = call float @llvm.amdgcn.fract.f32(float %src)
   %res = fadd float %fract, %fract
   ret float %res
 }
 
 define amdgpu_ps float @v_fract_f32(float %src) {
-; GFX9-LABEL: v_fract_f32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_fract_f32_e32 v0, v0
-; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX12-LABEL: v_fract_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_fract_f32_e32 v0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_add_f32_e32 v0, v0, v0
+; GFX12-NEXT:    ; return to shader part epilog
   %fract = call float @llvm.amdgcn.fract.f32(float %src)
   %res = fadd float %fract, %fract
   ret float %res
 }
 
-define amdgpu_ps double @s_fract_f64(double inreg %src) {
-; GFX9-LABEL: s_fract_f64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_fract_f64_e32 v[0:1], s[0:1]
-; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX9-NEXT:    ; return to shader part epilog
-  %fract = call double @llvm.amdgcn.fract.f64(double %src)
-  %res = fadd double %fract, %fract
-  ret double %res
+define amdgpu_ps void @s_fract_f64(double inreg %src, ptr addrspace(1) %out) {
+; GFX12-LABEL: s_fract_f64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_fract_f64_e32 v[2:3], s[0:1]
+; GFX12-NEXT:    s_mov_b32 s2, 0
+; GFX12-NEXT:    s_brev_b32 s3, 1
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX12-NEXT:    v_readfirstlane_b32 s1, v3
+; GFX12-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT:    s_endpgm
+ %fract = call double @llvm.amdgcn.fract.f64(double %src)
+ %fract.i64 = bitcast double %fract to i64
+ %neg = xor i64 %fract.i64, u0x8000000000000000
+ store i64 %neg, ptr addrspace(1) %out
+ ret void
 }
 
 define amdgpu_ps double @v_fract_f64(double %src) {
-; GFX9-LABEL: v_fract_f64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX9-NEXT:    ; return to shader part epilog
+; GFX12-LABEL: v_fract_f64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_fract_f64_e32 v[0:1], v[0:1]
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT:    v_add_f64_e32 v[0:1], v[0:1], v[0:1]
+; GFX12-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX12-NEXT:    ; return to shader part epilog
   %fract = call double @llvm.amdgcn.fract.f64(double %src)
   %res = fadd double %fract, %fract
   ret double %res