[llvm] [LoongArch] Add generation support for `preld` instruction (PR #118436)

Mon Dec 2 23:11:25 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

<details>
<summary>Changes</summary>

Instruction `preld` is used to prefetch one cache-line of data from memory in advance into the cache.

This commit allows it to be generated automatically.

---
Full diff: https://github.com/llvm/llvm-project/pull/118436.diff


5 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp (+22) 
- (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h (+1) 
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+2) 
- (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+9) 
- (added) llvm/test/CodeGen/LoongArch/preld.ll (+67) 


``````````diff

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index d330f953556018..099ce54cead140 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -245,6 +245,28 @@ bool LoongArchDAGToDAGISel::selectNonFIBaseAddr(SDValue Addr, SDValue &Base) {
   return true;
 }
 
+bool LoongArchDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
+                                             SDValue &Offset) {
+  SDLoc DL(Addr);
+  MVT VT = Addr.getSimpleValueType();
+
+  // The address is the result of an ADD. Here we only consider reg+simm12.
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    int64_t Imm = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+    if (isInt<12>(Imm)) {
+      Base = Addr.getOperand(0);
+      Offset = CurDAG->getTargetConstant(SignExtend64<12>(Imm), DL, VT);
+      return true;
+    }
+  }
+
+  // Otherwise, we assume Addr as the base address and use constant 0 as the
+  // offset.
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, DL, VT);
+  return true;
+}
+
 bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
                                             SDValue &ShAmt) {
   // Shift instructions on LoongArch only read the lower 5 or 6 bits of the
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 363b4f0ca7cf06..46c286bdb4eb77 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -43,6 +43,7 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
   bool SelectBaseAddr(SDValue Addr, SDValue &Base);
   bool SelectAddrConstant(SDValue Addr, SDValue &Base, SDValue &Offset);
   bool selectNonFIBaseAddr(SDValue Addr, SDValue &Base);
+  bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
 
   bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
   bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 16bceacfaa222c..a92142a3930f0f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -99,6 +99,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
+  setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+
   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
   // we get to know which of sll and revb.2h is faster.
   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 6134daf2fbe630..5eda3bc7225e05 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -521,6 +521,7 @@ def HI16ForAddu16idAddiPair: SDNodeXForm<imm, [{
 def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
 def AddrConstant : ComplexPattern<iPTR, 2, "SelectAddrConstant">;
 def NonFIBaseAddr : ComplexPattern<iPTR, 1, "selectNonFIBaseAddr">;
+def AddrRegImm : ComplexPattern<iPTR, 2, "SelectAddrRegImm">;
 
 def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa),
                       (fma node:$fj, node:$fk, node:$fa), [{
@@ -2009,6 +2010,14 @@ class PseudoMaskedAMMinMax
 def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax;
 def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
 
+// Data prefetch
+
+// TODO: Supports for preldx instruction.
+def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 0), timm, (i32 1)),
+          (PRELD 0, GPR:$rj, simm12:$imm12)>; // data prefetch for loads
+def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 1), timm, (i32 1)),
+          (PRELD 8, GPR:$rj, simm12:$imm12)>; // data prefetch for stores
+
 /// Compare and exchange
 
 class PseudoCmpXchg
diff --git a/llvm/test/CodeGen/LoongArch/preld.ll b/llvm/test/CodeGen/LoongArch/preld.ll
new file mode 100644
index 00000000000000..18057ac871f753
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/preld.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
+
+declare void @llvm.prefetch(ptr, i32, i32, i32)
+
+define void @load_prefetch_no_offset(ptr %a) {
+; LA32-LABEL: load_prefetch_no_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_prefetch_no_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  call void @llvm.prefetch(ptr %a, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @store_prefetch_no_offset(ptr %a) {
+; LA32-LABEL: store_prefetch_no_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 8, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_prefetch_no_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 8, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  call void @llvm.prefetch(ptr %a, i32 1, i32 3, i32 1)
+  ret void
+}
+
+define void @load_prefetch_with_offset(ptr %a) {
+; LA32-LABEL: load_prefetch_with_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 0, $a0, 200
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_prefetch_with_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 0, $a0, 200
+; LA64-NEXT:    ret
+entry:
+  %addr = getelementptr i8, ptr %a, i64 200
+  call void @llvm.prefetch(ptr %addr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @store_prefetch_with_offset(ptr %a) {
+; LA32-LABEL: store_prefetch_with_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 8, $a0, 200
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_prefetch_with_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 8, $a0, 200
+; LA64-NEXT:    ret
+entry:
+  %addr = getelementptr i8, ptr %a, i64 200
+  call void @llvm.prefetch(ptr %addr, i32 1, i32 3, i32 1)
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/118436