[llvm] [AMDGPU][NFC] Update cache policy descriptions (PR #78768)

Mirko BrkuĊĦanin via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 19 11:39:04 PST 2024


https://github.com/mbrkusanin created https://github.com/llvm/llvm-project/pull/78768

None

>From 4360cf9db40e905ec282c85d7d4d20d5bc9e557b Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Fri, 19 Jan 2024 20:36:42 +0100
Subject: [PATCH] [AMDGPU][NFC] Update cache policy descriptions

---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 150 +++++++++++++++++++----
 1 file changed, 126 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9302e590a6fc93..9499b4ffd439b3 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -849,7 +849,6 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
       [llvm_i32_ty,                              // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
        llvm_i32_ty]),                            // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc;
                                                  //   gfx12+ imm: bits [0-2] = th, bits [3-4] = scope)
-                                                 // TODO-GFX12: Update all other cachepolicy descriptions.
 
      !listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn],
           !if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
@@ -1077,7 +1076,8 @@ def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic <
   [llvm_any_ty],
   [llvm_v4i32_ty,     // rsrc(SGPR)
    llvm_i32_ty,       // byte offset
-   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc;
+                      //   gfx12+ imm: bits [0-2] = th, bits [3-4] = scope)
                       // Note: volatile bit is **not** permitted here.
   [IntrNoMem, ImmArg<ArgIndex<2>>]>,
   AMDGPURsrcIntrinsic<0>;
@@ -1117,8 +1117,13 @@ class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsi
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz),
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz),
+                      //                  all:
                       //                      volatile op (bit 31, stripped at lowering))
   [IntrReadMem, ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
@@ -1132,8 +1137,13 @@ class AMDGPURawPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
    llvm_i32_ty,                 // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],                // auxiliary data (imm, cachepolicy (bit 0 = glc,
                                 //                                   bit 1 = slc,
-                                //                                   bit 2 = dlc on gfx10+),
+                                //                                   bit 2 = dlc on gfx10/gfx11),
                                 //                      swizzled buffer (bit 3 = swz),
+                                //                  gfx12+:
+                                //                      cachepolicy (bits [0-2] = th,
+                                //                                   bits [3-4] = scope)
+                                //                      swizzled buffer (bit 6 = swz),
+                                //                  all:
                                 //                      volatile op (bit 31, stripped at lowering))
 
   [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
@@ -1150,8 +1160,13 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz),
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz),
+                      //                  all:
                       //                      volatile op (bit 31, stripped at lowering))
   [IntrReadMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
@@ -1166,8 +1181,13 @@ class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIn
    llvm_i32_ty,                 // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],                // auxiliary data (imm, cachepolicy (bit 0 = glc,
                                 //                                   bit 1 = slc,
-                                //                                   bit 2 = dlc on gfx10+),
+                                //                                   bit 2 = dlc on gfx10/gfx11),
                                 //                      swizzled buffer (bit 3 = swz),
+                                //                  gfx12+:
+                                //                      cachepolicy (bits [0-2] = th,
+                                //                                   bits [3-4] = scope)
+                                //                      swizzled buffer (bit 6 = swz),
+                                //                  all:
                                 //                      volatile op (bit 31, stripped at lowering))
   [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
    ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
@@ -1183,8 +1203,13 @@ class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrins
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz),
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz),
+                      //                  all:
                       //                      volatile op (bit 31, stripped at lowering))
   [IntrWriteMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
@@ -1199,8 +1224,13 @@ class AMDGPURawPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr
    llvm_i32_ty,                 // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],                // auxiliary data (imm, cachepolicy (bit 0 = glc,
                                 //                                   bit 1 = slc,
-                                //                                   bit 2 = dlc on gfx10+),
+                                //                                   bit 2 = dlc on gfx10/gfx11),
                                 //                      swizzled buffer (bit 3 = swz),
+                                //                  gfx12+:
+                                //                      cachepolicy (bits [0-2] = th,
+                                //                                   bits [3-4] = scope)
+                                //                      swizzled buffer (bit 6 = swz),
+                                //                  all:
                                 //                      volatile op (bit 31, stripped at lowering))
   [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
   ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
@@ -1217,8 +1247,13 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz),
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz),
+                      //                  all:
                       //                      volatile op (bit 31, stripped at lowering))
   [IntrWriteMem, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
@@ -1234,8 +1269,13 @@ class AMDGPUStructPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsI
    llvm_i32_ty,                 // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],                // auxiliary data (imm, cachepolicy (bit 0 = glc,
                                 //                                   bit 1 = slc,
-                                //                                   bit 2 = dlc on gfx10+),
+                                //                                   bit 2 = dlc on gfx10/gfx11),
                                 //                      swizzled buffer (bit 3 = swz),
+                                //                  gfx12+:
+                                //                      cachepolicy (bits [0-2] = th,
+                                //                                   bits [3-4] = scope)
+                                //                      swizzled buffer (bit 6 = swz),
+                                //                  all:
                                 //                      volatile op (bit 31, stripped at lowering))
   [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
    ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
@@ -1491,8 +1531,12 @@ def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic <
      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],    // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz))
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz)
     [IntrReadMem,
      ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
@@ -1505,8 +1549,12 @@ def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic <
      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],    // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz),
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz)
                       //                      volatile op (bit 31, stripped at lowering))
     [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
      ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
@@ -1521,8 +1569,13 @@ def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic <
      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],   // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                      //                                       bit 1 = slc,
-                     //                                       bit 2 = dlc on gfx10+),
+                     //                                       bit 2 = dlc on gfx10/gfx11),
                      //                      swizzled buffer (bit 3 = swz),
+                     //                  gfx12+:
+                     //                      cachepolicy (bits [0-2] = th,
+                     //                                   bits [3-4] = scope)
+                     //                      swizzled buffer (bit 6 = swz),
+                     //                  all:
                      //                      volatile op (bit 31, stripped at lowering))
     [IntrWriteMem,
      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
@@ -1537,8 +1590,13 @@ def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic <
      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],   // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                      //                                       bit 1 = slc,
-                     //                                       bit 2 = dlc on gfx10+),
+                     //                                       bit 2 = dlc on gfx10/gfx11),
                      //                      swizzled buffer (bit 3 = swz),
+                     //                  gfx12+:
+                     //                      cachepolicy (bits [0-2] = th,
+                     //                                   bits [3-4] = scope)
+                     //                      swizzled buffer (bit 6 = swz),
+                     //                  all:
                      //                      volatile op (bit 31, stripped at lowering))
     [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
@@ -1553,8 +1611,13 @@ def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic <
      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],    // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz),
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz),
+                      //                  all:
                       //                      volatile op (bit 31, stripped at lowering))
     [IntrReadMem,
      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
@@ -1569,8 +1632,13 @@ def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic <
      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],    // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                       //                                       bit 1 = slc,
-                      //                                       bit 2 = dlc on gfx10+),
+                      //                                       bit 2 = dlc on gfx10/gfx11),
                       //                      swizzled buffer (bit 3 = swz),
+                      //                  gfx12+:
+                      //                      cachepolicy (bits [0-2] = th,
+                      //                                   bits [3-4] = scope)
+                      //                      swizzled buffer (bit 6 = swz),
+                      //                  all:
                       //                      volatile op (bit 31, stripped at lowering))
     [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
@@ -1586,9 +1654,14 @@ def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic <
      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],   // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                      //                                       bit 1 = slc,
-                     //                                       bit 2 = dlc on gfx10+),
+                     //                                       bit 2 = dlc on gfx10/gfx11),
                      //                      swizzled buffer (bit 3 = swz),
-                    //                      volatile op (bit 31, stripped at lowering))
+                     //                  gfx12+:
+                     //                      cachepolicy (bits [0-2] = th,
+                     //                                   bits [3-4] = scope)
+                     //                      swizzled buffer (bit 6 = swz),
+                     //                  all:
+                     //                      volatile op (bit 31, stripped at lowering))
     [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
      ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
@@ -1603,8 +1676,13 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <
      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
      llvm_i32_ty],   // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                      //                                       bit 1 = slc,
-                     //                                       bit 2 = dlc on gfx10+),
+                     //                                       bit 2 = dlc on gfx10/gfx11),
                      //                      swizzled buffer (bit 3 = swz),
+                     //                  gfx12+:
+                     //                      cachepolicy (bits [0-2] = th,
+                     //                                   bits [3-4] = scope)
+                     //                      swizzled buffer (bit 6 = swz),
+                     //                  all:
                      //                      volatile op (bit 31, stripped at lowering))
     [IntrWriteMem,
      ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
@@ -1665,8 +1743,13 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
    llvm_i32_ty,                        // imm offset(imm, included in bounds checking and swizzling)
    llvm_i32_ty],                       // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                                        //                                       bit 1 = slc,
-                                       //                                       bit 2 = dlc on gfx10+))
+                                       //                                       bit 2 = dlc on gfx10/gfx11))
                                        //                      swizzled buffer (bit 3 = swz),
+                                       //                  gfx12+:
+                                       //                      cachepolicy (bits [0-2] = th,
+                                       //                                   bits [3-4] = scope)
+                                       //                      swizzled buffer (bit 6 = swz),
+                                       //                  all:
                                        //                      volatile op (bit 31, stripped at lowering))
   [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
    ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
@@ -1682,8 +1765,13 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic <
    llvm_i32_ty,                        // imm offset(imm, included in bounds checking and swizzling)
    llvm_i32_ty],                       // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                                        //                                       bit 1 = slc,
-                                       //                                       bit 2 = dlc on gfx10+))
+                                       //                                       bit 2 = dlc on gfx10/gfx11))
                                        //                      swizzled buffer (bit 3 = swz),
+                                       //                  gfx12+:
+                                       //                      cachepolicy (bits [0-2] = th,
+                                       //                                   bits [3-4] = scope)
+                                       //                      swizzled buffer (bit 6 = swz),
+                                       //                  all:
                                        //                      volatile op (bit 31, stripped at lowering))
   [IntrWillReturn, IntrArgMemOnly,
    ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
@@ -1703,8 +1791,13 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
    llvm_i32_ty,                        // imm offset(imm, included in bounds checking and swizzling)
    llvm_i32_ty],                       // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                                        //                                       bit 1 = slc,
-                                       //                                       bit 2 = dlc on gfx10+))
+                                       //                                       bit 2 = dlc on gfx10/gfx11))
                                        //                      swizzled buffer (bit 3 = swz),
+                                       //                  gfx12+:
+                                       //                      cachepolicy (bits [0-2] = th,
+                                       //                                   bits [3-4] = scope)
+                                       //                      swizzled buffer (bit 6 = swz),
+                                       //                  all:
                                        //                      volatile op (bit 31, stripped at lowering))
   [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
    ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
@@ -1721,8 +1814,13 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
    llvm_i32_ty,                        // imm offset(imm, included in bounds checking and swizzling)
    llvm_i32_ty],                       // auxiliary data (imm, cachepolicy     (bit 0 = glc,
                                        //                                       bit 1 = slc,
-                                       //                                       bit 2 = dlc on gfx10+))
+                                       //                                       bit 2 = dlc on gfx10/gfx11))
                                        //                      swizzled buffer (bit 3 = swz),
+                                       //                  gfx12+:
+                                       //                      cachepolicy (bits [0-2] = th,
+                                       //                                   bits [3-4] = scope)
+                                       //                      swizzled buffer (bit 6 = swz),
+                                       //                  all:
                                        //                      volatile op (bit 31, stripped at lowering))
   [IntrWillReturn, IntrArgMemOnly,
    ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
@@ -2396,8 +2494,12 @@ class AMDGPUGlobalLoadLDS : Intrinsic <
    llvm_i32_ty,                        // imm offset (applied to both global and LDS address)
    llvm_i32_ty],                       // auxiliary data (imm, cachepolicy (bit 0 = glc/sc0,
                                        //                                   bit 1 = slc/sc1,
-                                       //                                   bit 2 = dlc on gfx10+))
+                                       //                                   bit 2 = dlc on gfx10/gfx11))
                                        //                                   bit 4 = scc/nt on gfx90a+))
+                                       //                  gfx12+:
+                                       //                      cachepolicy (bits [0-2] = th,
+                                       //                                   bits [3-4] = scope)
+                                       //                      swizzled buffer (bit 6 = swz),
   [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
    ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree],
   "", [SDNPMemOperand]>;



More information about the llvm-commits mailing list