[llvm] [RISCV] Match prefetch address with offset (PR #66072)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 12 05:00:57 PDT 2023


llvmbot wrote:

@llvm/pr-subscribers-backend-risc-v

<details>
<summary>Changes</summary>

A new ComplexPattern `AddrRegImmLsb00000` is added, which is like
`AddrRegImm` except that if the least significant 5 bits isn't all
zeros, we will fail back to offset 0.
--

Patch is 50.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66072.diff

4 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+16) 
- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h (+1) 
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td (+11-7) 
- (modified) llvm/test/CodeGen/RISCV/prefetch.ll (+1043) 


<pre>
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index c6cefc000b3f746..4ef65ea0cd1ecdd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2441,6 +2441,22 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
   return true;
 }
 
+bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
+                                                 SDValue &Offset) {
+
+  bool Found = SelectAddrRegImm(Addr, Base, Offset);
+  assert(Found && "SelectAddrRegImm should always succeed");
+  (void)Found;
+
+  if (auto *OffsetImm = dyn_cast<ConstantSDNode>(Offset))
+    if (OffsetImm->getAPIntValue().countTrailingZeros() >= 5)
+      return true;
+
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getSimpleValueType());
+  return true;
+}
+
 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
                                         SDValue &ShAmt) {
   ShAmt = N;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index fbc1520a54ba071..cb8d3ced92a29bf 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -53,6 +53,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) {
     return SelectAddrRegImm(Addr, Base, Offset, true);
   }
+  bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset);
 
   bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount,
                              SDValue &Base, SDValue &Index, SDValue &Scale);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
index 509d1cfcd874444..de8a96607864ef1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
@@ -74,12 +74,16 @@ def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
 // Patterns
 //===----------------------------------------------------------------------===//
 
+def AddrRegImmLsb00000 : ComplexPattern<iPTR, 2, "SelectAddrRegImmLsb00000">;
+
 let Predicates = [HasStdExtZicbop] in {
-  // FIXME: Match address with offset
-  def : Pat<(prefetch GPR:$rs1, imm, imm, (XLenVT 0)),
-            (PREFETCH_I GPR:$rs1, 0)>;
-  def : Pat<(prefetch GPR:$rs1, (XLenVT 0), imm, (XLenVT 1)),
-            (PREFETCH_R GPR:$rs1, 0)>;
-  def : Pat<(prefetch GPR:$rs1, (XLenVT 1), imm, (XLenVT 1)),
-            (PREFETCH_W GPR:$rs1, 0)>;
+  def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+                      imm, imm, (XLenVT 0)),
+            (PREFETCH_I GPR:$rs1, simm12_lsb00000:$imm12)>;
+  def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+                      (XLenVT 0), imm, (XLenVT 1)),
+            (PREFETCH_R GPR:$rs1, simm12_lsb00000:$imm12)>;
+  def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+                      (XLenVT 1), imm, (XLenVT 1)),
+            (PREFETCH_W GPR:$rs1, simm12_lsb00000:$imm12)>;
 }
diff --git a/llvm/test/CodeGen/RISCV/prefetch.ll b/llvm/test/CodeGen/RISCV/prefetch.ll
index 39732636d298b0f..ae4a903f439b7a8 100644
--- a/llvm/test/CodeGen/RISCV/prefetch.ll
+++ b/llvm/test/CodeGen/RISCV/prefetch.ll
@@ -40,6 +40,95 @@ define void @test_prefetch_read_locality_0(ptr %a) nounwind {
   ret void
 }
 
+define void @test_prefetch_offsetable_read_locality_0(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV32ZICBOP:       # %bb.0:
+; RV32ZICBOP-NEXT:    prefetch.r -2048(a0)
+; RV32ZICBOP-NEXT:    prefetch.r 2016(a0)
+; RV32ZICBOP-NEXT:    prefetch.r 32(a0)
+; RV32ZICBOP-NEXT:    prefetch.r -32(a0)
+; RV32ZICBOP-NEXT:    addi a1, a0, -2048
+; RV32ZICBOP-NEXT:    addi a1, a1, -1
+; RV32ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 2047
+; RV32ZICBOP-NEXT:    addi a1, a1, 1
+; RV32ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 16
+; RV32ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV32ZICBOP-NEXT:    addi a0, a0, -16
+; RV32ZICBOP-NEXT:    prefetch.r 0(a0)
+; RV32ZICBOP-NEXT:    ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV64ZICBOP:       # %bb.0:
+; RV64ZICBOP-NEXT:    prefetch.r -2048(a0)
+; RV64ZICBOP-NEXT:    prefetch.r 2016(a0)
+; RV64ZICBOP-NEXT:    prefetch.r 32(a0)
+; RV64ZICBOP-NEXT:    prefetch.r -32(a0)
+; RV64ZICBOP-NEXT:    addi a1, a0, -2048
+; RV64ZICBOP-NEXT:    addi a1, a1, -1
+; RV64ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 2047
+; RV64ZICBOP-NEXT:    addi a1, a1, 1
+; RV64ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 16
+; RV64ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOP-NEXT:    addi a0, a0, -16
+; RV64ZICBOP-NEXT:    prefetch.r 0(a0)
+; RV64ZICBOP-NEXT:    ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV64ZICBOPZIHINTNTL:       # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r -2048(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 2016(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ret
+  %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+  call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 0, i32 1)
+  %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+  call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 0, i32 1)
+  %valid_addr2 = getelementptr i8, ptr %a, i64 32
+  call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 0, i32 1)
+  %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+  call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 0, i32 1)
+  %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+  call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 0, i32 1)
+  %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+  call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 0, i32 1)
+  %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+  call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 0, i32 1)
+  %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+  call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 0, i32 1)
+  ret void
+}
+
 define void @test_prefetch_write_locality_0(ptr %a) nounwind {
 ; RV32I-LABEL: test_prefetch_write_locality_0:
 ; RV32I:       # %bb.0:
@@ -68,6 +157,95 @@ define void @test_prefetch_write_locality_0(ptr %a) nounwind {
   ret void
 }
 
+define void @test_prefetch_offsetable_write_locality_0(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV32ZICBOP:       # %bb.0:
+; RV32ZICBOP-NEXT:    prefetch.w -2048(a0)
+; RV32ZICBOP-NEXT:    prefetch.w 2016(a0)
+; RV32ZICBOP-NEXT:    prefetch.w 32(a0)
+; RV32ZICBOP-NEXT:    prefetch.w -32(a0)
+; RV32ZICBOP-NEXT:    addi a1, a0, -2048
+; RV32ZICBOP-NEXT:    addi a1, a1, -1
+; RV32ZICBOP-NEXT:    prefetch.w 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 2047
+; RV32ZICBOP-NEXT:    addi a1, a1, 1
+; RV32ZICBOP-NEXT:    prefetch.w 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 16
+; RV32ZICBOP-NEXT:    prefetch.w 0(a1)
+; RV32ZICBOP-NEXT:    addi a0, a0, -16
+; RV32ZICBOP-NEXT:    prefetch.w 0(a0)
+; RV32ZICBOP-NEXT:    ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV64ZICBOP:       # %bb.0:
+; RV64ZICBOP-NEXT:    prefetch.w -2048(a0)
+; RV64ZICBOP-NEXT:    prefetch.w 2016(a0)
+; RV64ZICBOP-NEXT:    prefetch.w 32(a0)
+; RV64ZICBOP-NEXT:    prefetch.w -32(a0)
+; RV64ZICBOP-NEXT:    addi a1, a0, -2048
+; RV64ZICBOP-NEXT:    addi a1, a1, -1
+; RV64ZICBOP-NEXT:    prefetch.w 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 2047
+; RV64ZICBOP-NEXT:    addi a1, a1, 1
+; RV64ZICBOP-NEXT:    prefetch.w 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 16
+; RV64ZICBOP-NEXT:    prefetch.w 0(a1)
+; RV64ZICBOP-NEXT:    addi a0, a0, -16
+; RV64ZICBOP-NEXT:    prefetch.w 0(a0)
+; RV64ZICBOP-NEXT:    ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV64ZICBOPZIHINTNTL:       # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w -2048(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w 2016(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w 32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.w 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ret
+  %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+  call void @llvm.prefetch(ptr %valid_addr0, i32 1, i32 0, i32 1)
+  %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+  call void @llvm.prefetch(ptr %valid_addr1, i32 1, i32 0, i32 1)
+  %valid_addr2 = getelementptr i8, ptr %a, i64 32
+  call void @llvm.prefetch(ptr %valid_addr2, i32 1, i32 0, i32 1)
+  %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+  call void @llvm.prefetch(ptr %valid_addr3, i32 1, i32 0, i32 1)
+  %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+  call void @llvm.prefetch(ptr %invalid_addr0, i32 1, i32 0, i32 1)
+  %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+  call void @llvm.prefetch(ptr %invalid_addr1, i32 1, i32 0, i32 1)
+  %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+  call void @llvm.prefetch(ptr %invalid_addr2, i32 1, i32 0, i32 1)
+  %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+  call void @llvm.prefetch(ptr %invalid_addr3, i32 1, i32 0, i32 1)
+  ret void
+}
+
 define void @test_prefetch_instruction_locality_0(ptr %a) nounwind {
 ; RV32I-LABEL: test_prefetch_instruction_locality_0:
 ; RV32I:       # %bb.0:
@@ -96,6 +274,95 @@ define void @test_prefetch_instruction_locality_0(ptr %a) nounwind {
   ret void
 }
 
+define void @test_prefetch_offsetable_instruction_locality_0(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV32ZICBOP:       # %bb.0:
+; RV32ZICBOP-NEXT:    prefetch.i -2048(a0)
+; RV32ZICBOP-NEXT:    prefetch.i 2016(a0)
+; RV32ZICBOP-NEXT:    prefetch.i 32(a0)
+; RV32ZICBOP-NEXT:    prefetch.i -32(a0)
+; RV32ZICBOP-NEXT:    addi a1, a0, -2048
+; RV32ZICBOP-NEXT:    addi a1, a1, -1
+; RV32ZICBOP-NEXT:    prefetch.i 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 2047
+; RV32ZICBOP-NEXT:    addi a1, a1, 1
+; RV32ZICBOP-NEXT:    prefetch.i 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 16
+; RV32ZICBOP-NEXT:    prefetch.i 0(a1)
+; RV32ZICBOP-NEXT:    addi a0, a0, -16
+; RV32ZICBOP-NEXT:    prefetch.i 0(a0)
+; RV32ZICBOP-NEXT:    ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV64ZICBOP:       # %bb.0:
+; RV64ZICBOP-NEXT:    prefetch.i -2048(a0)
+; RV64ZICBOP-NEXT:    prefetch.i 2016(a0)
+; RV64ZICBOP-NEXT:    prefetch.i 32(a0)
+; RV64ZICBOP-NEXT:    prefetch.i -32(a0)
+; RV64ZICBOP-NEXT:    addi a1, a0, -2048
+; RV64ZICBOP-NEXT:    addi a1, a1, -1
+; RV64ZICBOP-NEXT:    prefetch.i 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 2047
+; RV64ZICBOP-NEXT:    addi a1, a1, 1
+; RV64ZICBOP-NEXT:    prefetch.i 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 16
+; RV64ZICBOP-NEXT:    prefetch.i 0(a1)
+; RV64ZICBOP-NEXT:    addi a0, a0, -16
+; RV64ZICBOP-NEXT:    prefetch.i 0(a0)
+; RV64ZICBOP-NEXT:    ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV64ZICBOPZIHINTNTL:       # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i -2048(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i 2016(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i 32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.i 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ret
+  %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+  call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 0, i32 0)
+  %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+  call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 0, i32 0)
+  %valid_addr2 = getelementptr i8, ptr %a, i64 32
+  call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 0, i32 0)
+  %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+  call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 0, i32 0)
+  %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+  call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 0, i32 0)
+  %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+  call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 0, i32 0)
+  %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+  call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 0, i32 0)
+  %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+  call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 0, i32 0)
+  ret void
+}
+
 define void @test_prefetch_read_locality_1(ptr %a) nounwind {
 ; RV32I-LABEL: test_prefetch_read_locality_1:
 ; RV32I:       # %bb.0:
@@ -124,6 +391,95 @@ define void @test_prefetch_read_locality_1(ptr %a) nounwind {
   ret void
 }
 
+define void @test_prefetch_offsetable_read_locality_1(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV32ZICBOP:       # %bb.0:
+; RV32ZICBOP-NEXT:    prefetch.r -2048(a0)
+; RV32ZICBOP-NEXT:    prefetch.r 2016(a0)
+; RV32ZICBOP-NEXT:    prefetch.r 32(a0)
+; RV32ZICBOP-NEXT:    prefetch.r -32(a0)
+; RV32ZICBOP-NEXT:    addi a1, a0, -2048
+; RV32ZICBOP-NEXT:    addi a1, a1, -1
+; RV32ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 2047
+; RV32ZICBOP-NEXT:    addi a1, a1, 1
+; RV32ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV32ZICBOP-NEXT:    addi a1, a0, 16
+; RV32ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV32ZICBOP-NEXT:    addi a0, a0, -16
+; RV32ZICBOP-NEXT:    prefetch.r 0(a0)
+; RV32ZICBOP-NEXT:    ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV64ZICBOP:       # %bb.0:
+; RV64ZICBOP-NEXT:    prefetch.r -2048(a0)
+; RV64ZICBOP-NEXT:    prefetch.r 2016(a0)
+; RV64ZICBOP-NEXT:    prefetch.r 32(a0)
+; RV64ZICBOP-NEXT:    prefetch.r -32(a0)
+; RV64ZICBOP-NEXT:    addi a1, a0, -2048
+; RV64ZICBOP-NEXT:    addi a1, a1, -1
+; RV64ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 2047
+; RV64ZICBOP-NEXT:    addi a1, a1, 1
+; RV64ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOP-NEXT:    addi a1, a0, 16
+; RV64ZICBOP-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOP-NEXT:    addi a0, a0, -16
+; RV64ZICBOP-NEXT:    prefetch.r 0(a0)
+; RV64ZICBOP-NEXT:    ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV64ZICBOPZIHINTNTL:       # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r -2048(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 2016(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT:    addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT:    ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT:    prefetch.r 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT:    ret
+  %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+  call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 1, i32 1)
+  %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+  call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 1, i32 1)
+  %valid_addr2 = getelementptr i8, ptr %a, i64 32
+  call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 1, i32 1)
+  %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+  call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 1, i32 1)
+  %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+  call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 1, i32 1)
+  %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+  call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 1, i32 1)
+  %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+  call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 1, i32 1)
+  %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+  call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 1, i32 1)
+  ret void
+}
+
 define void @test_prefetch_write_locality_1(ptr %a) nounwind {
 ; RV32I-LABEL: test_prefetch_write_locality_1:
 ; RV32I:       # %bb.0:
@@ -152,6 +508,95 @@ define void @test_prefetch_write_locality_1(ptr %a) nounwind {
   ret void
 }
 
+define void @test_prefetch_offsetable_write_locality_1(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_write_locality_1:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_write_locality_1:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable...
<truncated>
</pre>

</details>

https://github.com/llvm/llvm-project/pull/66072


More information about the llvm-commits mailing list