[llvm] 32073b8 - AMDGPU: Do not generate non-temporal hint when Load_Tr intrinsic did not specify it (#79104)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 23 10:05:36 PST 2024


Author: Changpeng Fang
Date: 2024-01-23T10:05:32-08:00
New Revision: 32073b835674a9e7bc3e1ee9708efb7c58e7394f

URL: https://github.com/llvm/llvm-project/commit/32073b835674a9e7bc3e1ee9708efb7c58e7394f
DIFF: https://github.com/llvm/llvm-project/commit/32073b835674a9e7bc3e1ee9708efb7c58e7394f.diff

LOG: AMDGPU: Do not generate non-temporal hint when Load_Tr intrinsic did not specify it (#79104)

int_amdgcn_global_load_tr did not specify non-temporal load transpose,
thus we should
not genetrate the non-temporal hint for the load. We need to implement
getTgtMemIntrinsic
to create the corresponding MemSDNode. And we don't set the non-temporal
flag because
the intrinsic did not specify it.

NOTE: We need to implement getTgtMemIntrinsic for any memory intrinsics.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 073c8cc72117375..cf947dccafac55f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1348,6 +1348,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                   MachineMemOperand::MOVolatile;
     return true;
   }
+  case Intrinsic::amdgcn_global_load_tr: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = CI.getOperand(0);
+    Info.align.reset();
+    Info.flags |= MachineMemOperand::MOLoad;
+    return true;
+  }
   case Intrinsic::amdgcn_ds_gws_init:
   case Intrinsic::amdgcn_ds_gws_barrier:
   case Intrinsic::amdgcn_ds_gws_sema_v:
@@ -1407,6 +1415,7 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
                                             SmallVectorImpl<Value*> &Ops,
                                             Type *&AccessTy) const {
   switch (II->getIntrinsicID()) {
+  case Intrinsic::amdgcn_global_load_tr:
   case Intrinsic::amdgcn_ds_ordered_add:
   case Intrinsic::amdgcn_ds_ordered_swap:
   case Intrinsic::amdgcn_ds_append:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
index 5382b56b92fb1d4..8f1e6f3ac1a0c3d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
@@ -13,9 +13,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
 ; GFX12-SDAG-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W32-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-SDAG-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W32-NEXT:    global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
 ; GFX12-SDAG-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W32-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-SDAG-W32-NEXT:    s_nop 0
 ; GFX12-SDAG-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -26,9 +25,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
 ; GFX12-GISEL-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W32-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-GISEL-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W32-NEXT:    global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
 ; GFX12-GISEL-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W32-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-GISEL-W32-NEXT:    s_nop 0
 ; GFX12-GISEL-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -46,9 +44,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
 ; GFX12-SDAG-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W32-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX12-SDAG-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
 ; GFX12-SDAG-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W32-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX12-SDAG-W32-NEXT:    s_nop 0
 ; GFX12-SDAG-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -59,9 +56,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
 ; GFX12-GISEL-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W32-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX12-GISEL-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
 ; GFX12-GISEL-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W32-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX12-GISEL-W32-NEXT:    s_nop 0
 ; GFX12-GISEL-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -79,9 +75,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
 ; GFX12-SDAG-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W32-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX12-SDAG-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
 ; GFX12-SDAG-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W32-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX12-SDAG-W32-NEXT:    s_nop 0
 ; GFX12-SDAG-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -92,9 +87,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
 ; GFX12-GISEL-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W32-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX12-GISEL-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
 ; GFX12-GISEL-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W32-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX12-GISEL-W32-NEXT:    s_nop 0
 ; GFX12-GISEL-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -112,9 +106,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
 ; GFX12-SDAG-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W32-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX12-SDAG-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
 ; GFX12-SDAG-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W32-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX12-SDAG-W32-NEXT:    s_nop 0
 ; GFX12-SDAG-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -125,9 +118,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
 ; GFX12-GISEL-W32-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W32-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX12-GISEL-W32-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W32-NEXT:    global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
 ; GFX12-GISEL-W32-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W32-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W32-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX12-GISEL-W32-NEXT:    s_nop 0
 ; GFX12-GISEL-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll
index 0936d1756364404..d5a45fb838fc7f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll
@@ -13,9 +13,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
 ; GFX12-SDAG-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_load_tr_b64 v1, v0, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W64-NEXT:    global_load_tr_b64 v1, v0, s[0:1] offset:32
 ; GFX12-SDAG-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W64-NEXT:    global_store_b32 v0, v1, s[2:3]
 ; GFX12-SDAG-W64-NEXT:    s_nop 0
 ; GFX12-SDAG-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -26,9 +25,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
 ; GFX12-GISEL-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W64-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-GISEL-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_load_tr_b64 v1, v0, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W64-NEXT:    global_load_tr_b64 v1, v0, s[0:1] offset:32
 ; GFX12-GISEL-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W64-NEXT:    global_store_b32 v0, v1, s[2:3]
 ; GFX12-GISEL-W64-NEXT:    s_nop 0
 ; GFX12-GISEL-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -46,9 +44,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
 ; GFX12-SDAG-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W64-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-SDAG-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
 ; GFX12-SDAG-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W64-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-SDAG-W64-NEXT:    s_nop 0
 ; GFX12-SDAG-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -59,9 +56,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
 ; GFX12-GISEL-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W64-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-GISEL-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
 ; GFX12-GISEL-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W64-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-GISEL-W64-NEXT:    s_nop 0
 ; GFX12-GISEL-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -79,9 +75,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
 ; GFX12-SDAG-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W64-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-SDAG-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
 ; GFX12-SDAG-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W64-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-SDAG-W64-NEXT:    s_nop 0
 ; GFX12-SDAG-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -92,9 +87,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
 ; GFX12-GISEL-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W64-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-GISEL-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
 ; GFX12-GISEL-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W64-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-GISEL-W64-NEXT:    s_nop 0
 ; GFX12-GISEL-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -112,9 +106,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
 ; GFX12-SDAG-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-SDAG-W64-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-SDAG-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-SDAG-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
 ; GFX12-SDAG-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-SDAG-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-SDAG-W64-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-SDAG-W64-NEXT:    s_nop 0
 ; GFX12-SDAG-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -125,9 +118,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
 ; GFX12-GISEL-W64-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
 ; GFX12-GISEL-W64-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX12-GISEL-W64-NEXT:    s_wait_kmcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
+; GFX12-GISEL-W64-NEXT:    global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
 ; GFX12-GISEL-W64-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-W64-NEXT:    global_inv scope:SCOPE_SYS
 ; GFX12-GISEL-W64-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
 ; GFX12-GISEL-W64-NEXT:    s_nop 0
 ; GFX12-GISEL-W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)


        


More information about the llvm-commits mailing list