[llvm] [RISCV] Match prefetch address with offset (PR #66072)
Wang Pengcheng via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 12 04:59:52 PDT 2023
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/66072:
A new ComplexPattern `AddrRegImmLsb00000` is added, which is like
`AddrRegImm` except that if the least significant 5 bits isn't all
zeros, we will fail back to offset 0.
>From 655c8abe64943db202b4393c355f9739c507023f Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Tue, 12 Sep 2023 19:17:49 +0800
Subject: [PATCH 1/2] [RISCV] Add prefetch test with offsetable addresses
We should be able to merge the offset later.
---
llvm/test/CodeGen/RISCV/prefetch.ll | 1151 +++++++++++++++++++++++++++
1 file changed, 1151 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/prefetch.ll b/llvm/test/CodeGen/RISCV/prefetch.ll
index 39732636d298b0f..655f84f1bdfe0a4 100644
--- a/llvm/test/CodeGen/RISCV/prefetch.ll
+++ b/llvm/test/CodeGen/RISCV/prefetch.ll
@@ -40,6 +40,104 @@ define void @test_prefetch_read_locality_0(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_read_locality_0(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.r 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.r 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_0:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 0, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 0, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 0, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 0, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 0, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 0, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 0, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 0, i32 1)
+ ret void
+}
+
define void @test_prefetch_write_locality_0(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_write_locality_0:
; RV32I: # %bb.0:
@@ -68,6 +166,104 @@ define void @test_prefetch_write_locality_0(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_write_locality_0(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.w 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.w 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_0:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 1, i32 0, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 1, i32 0, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 1, i32 0, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 1, i32 0, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 1, i32 0, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 1, i32 0, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 1, i32 0, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 1, i32 0, i32 1)
+ ret void
+}
+
define void @test_prefetch_instruction_locality_0(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_instruction_locality_0:
; RV32I: # %bb.0:
@@ -96,6 +292,104 @@ define void @test_prefetch_instruction_locality_0(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_instruction_locality_0(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.i 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.i 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_0:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 0, i32 0)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 0, i32 0)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 0, i32 0)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 0, i32 0)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 0, i32 0)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 0, i32 0)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 0, i32 0)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 0, i32 0)
+ ret void
+}
+
define void @test_prefetch_read_locality_1(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_read_locality_1:
; RV32I: # %bb.0:
@@ -124,6 +418,104 @@ define void @test_prefetch_read_locality_1(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_read_locality_1(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.r 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.r 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_1:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 1, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 1, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 1, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 1, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 1, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 1, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 1, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 1, i32 1)
+ ret void
+}
+
define void @test_prefetch_write_locality_1(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_write_locality_1:
; RV32I: # %bb.0:
@@ -152,6 +544,104 @@ define void @test_prefetch_write_locality_1(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_write_locality_1(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_write_locality_1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_write_locality_1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_1:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.w 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_1:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.w 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_1:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 1, i32 1, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 1, i32 1, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 1, i32 1, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 1, i32 1, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 1, i32 1, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 1, i32 1, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 1, i32 1, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 1, i32 1, i32 1)
+ ret void
+}
+
define void @test_prefetch_instruction_locality_1(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_instruction_locality_1:
; RV32I: # %bb.0:
@@ -180,6 +670,104 @@ define void @test_prefetch_instruction_locality_1(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_instruction_locality_1(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_instruction_locality_1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_instruction_locality_1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_1:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.i 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_1:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.i 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_1:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 1, i32 0)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 1, i32 0)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 1, i32 0)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 1, i32 0)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 1, i32 0)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 1, i32 0)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 1, i32 0)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 1, i32 0)
+ ret void
+}
+
define void @test_prefetch_read_locality_2(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_read_locality_2:
; RV32I: # %bb.0:
@@ -208,6 +796,104 @@ define void @test_prefetch_read_locality_2(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_read_locality_2(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_read_locality_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_read_locality_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_2:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.r 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_2:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.r 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_2:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 2, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 2, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 2, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 2, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 2, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 2, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 2, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 2, i32 1)
+ ret void
+}
+
define void @test_prefetch_write_locality_2(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_write_locality_2:
; RV32I: # %bb.0:
@@ -236,6 +922,104 @@ define void @test_prefetch_write_locality_2(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_write_locality_2(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_write_locality_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_write_locality_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_2:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.w 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_2:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.w 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_2:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 1, i32 2, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 1, i32 2, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 1, i32 2, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 1, i32 2, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 1, i32 2, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 1, i32 2, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 1, i32 2, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 1, i32 2, i32 1)
+ ret void
+}
+
define void @test_prefetch_instruction_locality_2(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_instruction_locality_2:
; RV32I: # %bb.0:
@@ -264,6 +1048,103 @@ define void @test_prefetch_instruction_locality_2(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_instruction_locality_2(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_instruction_locality_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_instruction_locality_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_2:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.i 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_2:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.i 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_2:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 2, i32 0)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 2, i32 0)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 2, i32 0)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 2, i32 0)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 2, i32 0)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 2, i32 0)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 2, i32 0)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 2, i32 0)
+ ret void
+}
define void @test_prefetch_read_locality_3(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_read_locality_3:
@@ -292,6 +1173,96 @@ define void @test_prefetch_read_locality_3(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_read_locality_3(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_read_locality_3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_read_locality_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_3:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.r 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.r 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.r 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_3:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.r 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.r 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.r 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_3:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 3, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 3, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 3, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 3, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 3, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 3, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 3, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 3, i32 1)
+ ret void
+}
+
define void @test_prefetch_write_locality_3(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_write_locality_3:
; RV32I: # %bb.0:
@@ -319,6 +1290,96 @@ define void @test_prefetch_write_locality_3(ptr %a) nounwind {
ret void
}
+define void @test_prefetch_offsetable_write_locality_3(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_write_locality_3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_write_locality_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_3:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.w 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.w 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.w 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_3:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.w 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.w 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.w 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_3:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 1, i32 3, i32 1)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 1, i32 3, i32 1)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 1, i32 3, i32 1)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 1, i32 3, i32 1)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 1, i32 3, i32 1)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 1, i32 3, i32 1)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 1, i32 3, i32 1)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 1, i32 3, i32 1)
+ ret void
+}
+
define void @test_prefetch_instruction_locality_3(ptr %a) nounwind {
; RV32I-LABEL: test_prefetch_instruction_locality_3:
; RV32I: # %bb.0:
@@ -345,3 +1406,93 @@ define void @test_prefetch_instruction_locality_3(ptr %a) nounwind {
call void @llvm.prefetch(ptr %a, i32 0, i32 3, i32 0)
ret void
}
+
+define void @test_prefetch_offsetable_instruction_locality_3(ptr %a) nounwind {
+; RV32I-LABEL: test_prefetch_offsetable_instruction_locality_3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: test_prefetch_offsetable_instruction_locality_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: ret
+;
+; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_3:
+; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: addi a1, a0, -2048
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a2, a0, 2016
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, 32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a2, a0, -32
+; RV32ZICBOP-NEXT: prefetch.i 0(a2)
+; RV32ZICBOP-NEXT: addi a1, a1, -1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 2047
+; RV32ZICBOP-NEXT: addi a1, a1, 1
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a1, a0, 16
+; RV32ZICBOP-NEXT: prefetch.i 0(a1)
+; RV32ZICBOP-NEXT: addi a0, a0, -16
+; RV32ZICBOP-NEXT: prefetch.i 0(a0)
+; RV32ZICBOP-NEXT: ret
+;
+; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_3:
+; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: addi a1, a0, -2048
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a2, a0, 2016
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, 32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a2, a0, -32
+; RV64ZICBOP-NEXT: prefetch.i 0(a2)
+; RV64ZICBOP-NEXT: addi a1, a1, -1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 2047
+; RV64ZICBOP-NEXT: addi a1, a1, 1
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a1, a0, 16
+; RV64ZICBOP-NEXT: prefetch.i 0(a1)
+; RV64ZICBOP-NEXT: addi a0, a0, -16
+; RV64ZICBOP-NEXT: prefetch.i 0(a0)
+; RV64ZICBOP-NEXT: ret
+;
+; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_3:
+; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, 1
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 16
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a0, a0, -16
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: ret
+ %valid_addr0 = getelementptr i8, ptr %a, i64 -2048
+ call void @llvm.prefetch(ptr %valid_addr0, i32 0, i32 3, i32 0)
+ %valid_addr1 = getelementptr i8, ptr %a, i64 2016
+ call void @llvm.prefetch(ptr %valid_addr1, i32 0, i32 3, i32 0)
+ %valid_addr2 = getelementptr i8, ptr %a, i64 32
+ call void @llvm.prefetch(ptr %valid_addr2, i32 0, i32 3, i32 0)
+ %valid_addr3 = getelementptr i8, ptr %a, i64 -32
+ call void @llvm.prefetch(ptr %valid_addr3, i32 0, i32 3, i32 0)
+ %invalid_addr0 = getelementptr i8, ptr %a, i64 -2049
+ call void @llvm.prefetch(ptr %invalid_addr0, i32 0, i32 3, i32 0)
+ %invalid_addr1 = getelementptr i8, ptr %a, i64 2048
+ call void @llvm.prefetch(ptr %invalid_addr1, i32 0, i32 3, i32 0)
+ %invalid_addr2 = getelementptr i8, ptr %a, i64 16
+ call void @llvm.prefetch(ptr %invalid_addr2, i32 0, i32 3, i32 0)
+ %invalid_addr3 = getelementptr i8, ptr %a, i64 -16
+ call void @llvm.prefetch(ptr %invalid_addr3, i32 0, i32 3, i32 0)
+ ret void
+}
>From ec47ce546cc63811948097ebbc0e040ccdc274c5 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Tue, 12 Sep 2023 19:26:41 +0800
Subject: [PATCH 2/2] [RISCV] Match prefetch address with offset
A new ComplexPattern `AddrRegImmLsb00000` is added, which is like
`AddrRegImm` except that if the least significant 5 bits isn't all
zeros, we will fail back to offset 0.
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 16 +
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 1 +
llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td | 18 +-
llvm/test/CodeGen/RISCV/prefetch.ll | 414 +++++++------------
4 files changed, 181 insertions(+), 268 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index c6cefc000b3f746..4ef65ea0cd1ecdd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2441,6 +2441,22 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
return true;
}
+bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+
+ bool Found = SelectAddrRegImm(Addr, Base, Offset);
+ assert(Found && "SelectAddrRegImm should always succeed");
+ (void)Found;
+
+ if (auto *OffsetImm = dyn_cast<ConstantSDNode>(Offset))
+ if (OffsetImm->getAPIntValue().countTrailingZeros() >= 5)
+ return true;
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getSimpleValueType());
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
ShAmt = N;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index fbc1520a54ba071..cb8d3ced92a29bf 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -53,6 +53,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) {
return SelectAddrRegImm(Addr, Base, Offset, true);
}
+ bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount,
SDValue &Base, SDValue &Index, SDValue &Scale);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
index 509d1cfcd874444..de8a96607864ef1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
@@ -74,12 +74,16 @@ def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
// Patterns
//===----------------------------------------------------------------------===//
+def AddrRegImmLsb00000 : ComplexPattern<iPTR, 2, "SelectAddrRegImmLsb00000">;
+
let Predicates = [HasStdExtZicbop] in {
- // FIXME: Match address with offset
- def : Pat<(prefetch GPR:$rs1, imm, imm, (XLenVT 0)),
- (PREFETCH_I GPR:$rs1, 0)>;
- def : Pat<(prefetch GPR:$rs1, (XLenVT 0), imm, (XLenVT 1)),
- (PREFETCH_R GPR:$rs1, 0)>;
- def : Pat<(prefetch GPR:$rs1, (XLenVT 1), imm, (XLenVT 1)),
- (PREFETCH_W GPR:$rs1, 0)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ imm, imm, (XLenVT 0)),
+ (PREFETCH_I GPR:$rs1, simm12_lsb00000:$imm12)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ (XLenVT 0), imm, (XLenVT 1)),
+ (PREFETCH_R GPR:$rs1, simm12_lsb00000:$imm12)>;
+ def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
+ (XLenVT 1), imm, (XLenVT 1)),
+ (PREFETCH_W GPR:$rs1, simm12_lsb00000:$imm12)>;
}
diff --git a/llvm/test/CodeGen/RISCV/prefetch.ll b/llvm/test/CodeGen/RISCV/prefetch.ll
index 655f84f1bdfe0a4..ae4a903f439b7a8 100644
--- a/llvm/test/CodeGen/RISCV/prefetch.ll
+++ b/llvm/test/CodeGen/RISCV/prefetch.ll
@@ -51,14 +51,11 @@ define void @test_prefetch_offsetable_read_locality_0(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_0:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.r 32(a0)
+; RV32ZICBOP-NEXT: prefetch.r -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.r 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.r 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -72,14 +69,11 @@ define void @test_prefetch_offsetable_read_locality_0(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_0:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.r 32(a0)
+; RV64ZICBOP-NEXT: prefetch.r -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.r 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.r 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -93,18 +87,15 @@ define void @test_prefetch_offsetable_read_locality_0(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_0:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
@@ -177,14 +168,11 @@ define void @test_prefetch_offsetable_write_locality_0(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_0:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.w 32(a0)
+; RV32ZICBOP-NEXT: prefetch.w -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.w 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.w 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -198,14 +186,11 @@ define void @test_prefetch_offsetable_write_locality_0(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_0:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.w 32(a0)
+; RV64ZICBOP-NEXT: prefetch.w -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.w 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.w 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -219,18 +204,15 @@ define void @test_prefetch_offsetable_write_locality_0(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_0:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
@@ -303,14 +285,11 @@ define void @test_prefetch_offsetable_instruction_locality_0(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_0:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.i 32(a0)
+; RV32ZICBOP-NEXT: prefetch.i -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.i 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.i 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -324,14 +303,11 @@ define void @test_prefetch_offsetable_instruction_locality_0(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_0:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.i 32(a0)
+; RV64ZICBOP-NEXT: prefetch.i -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.i 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.i 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -345,18 +321,15 @@ define void @test_prefetch_offsetable_instruction_locality_0(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_0:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.all
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
@@ -429,14 +402,11 @@ define void @test_prefetch_offsetable_read_locality_1(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_1:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.r 32(a0)
+; RV32ZICBOP-NEXT: prefetch.r -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.r 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.r 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -450,14 +420,11 @@ define void @test_prefetch_offsetable_read_locality_1(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_1:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.r 32(a0)
+; RV64ZICBOP-NEXT: prefetch.r -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.r 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.r 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -471,18 +438,15 @@ define void @test_prefetch_offsetable_read_locality_1(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_1:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
@@ -555,14 +519,11 @@ define void @test_prefetch_offsetable_write_locality_1(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_1:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.w 32(a0)
+; RV32ZICBOP-NEXT: prefetch.w -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.w 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.w 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -576,14 +537,11 @@ define void @test_prefetch_offsetable_write_locality_1(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_1:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.w 32(a0)
+; RV64ZICBOP-NEXT: prefetch.w -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.w 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.w 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -597,18 +555,15 @@ define void @test_prefetch_offsetable_write_locality_1(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_1:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
@@ -681,14 +636,11 @@ define void @test_prefetch_offsetable_instruction_locality_1(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_1:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.i 32(a0)
+; RV32ZICBOP-NEXT: prefetch.i -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.i 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.i 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -702,14 +654,11 @@ define void @test_prefetch_offsetable_instruction_locality_1(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_1:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.i 32(a0)
+; RV64ZICBOP-NEXT: prefetch.i -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.i 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.i 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -723,18 +672,15 @@ define void @test_prefetch_offsetable_instruction_locality_1(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_1:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.pall
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
@@ -807,14 +753,11 @@ define void @test_prefetch_offsetable_read_locality_2(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_2:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.r 32(a0)
+; RV32ZICBOP-NEXT: prefetch.r -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.r 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.r 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -828,14 +771,11 @@ define void @test_prefetch_offsetable_read_locality_2(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_2:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.r 32(a0)
+; RV64ZICBOP-NEXT: prefetch.r -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.r 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.r 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -849,18 +789,15 @@ define void @test_prefetch_offsetable_read_locality_2(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_2:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
@@ -933,14 +870,11 @@ define void @test_prefetch_offsetable_write_locality_2(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_2:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.w 32(a0)
+; RV32ZICBOP-NEXT: prefetch.w -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.w 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.w 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -954,14 +888,11 @@ define void @test_prefetch_offsetable_write_locality_2(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_2:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.w 32(a0)
+; RV64ZICBOP-NEXT: prefetch.w -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.w 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.w 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -975,18 +906,15 @@ define void @test_prefetch_offsetable_write_locality_2(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_2:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
@@ -1059,14 +987,11 @@ define void @test_prefetch_offsetable_instruction_locality_2(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_2:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.i 32(a0)
+; RV32ZICBOP-NEXT: prefetch.i -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.i 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.i 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -1080,14 +1005,11 @@ define void @test_prefetch_offsetable_instruction_locality_2(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_2:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.i 32(a0)
+; RV64ZICBOP-NEXT: prefetch.i -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.i 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.i 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -1101,18 +1023,15 @@ define void @test_prefetch_offsetable_instruction_locality_2(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_2:
; RV64ZICBOPZIHINTNTL: # %bb.0:
-; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -2048(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 2016(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: ntl.p1
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
@@ -1184,14 +1103,11 @@ define void @test_prefetch_offsetable_read_locality_3(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_read_locality_3:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.r 32(a0)
+; RV32ZICBOP-NEXT: prefetch.r -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.r 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.r 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.r 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -1205,14 +1121,11 @@ define void @test_prefetch_offsetable_read_locality_3(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_read_locality_3:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.r -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.r 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.r 32(a0)
+; RV64ZICBOP-NEXT: prefetch.r -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.r 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.r 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.r 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -1226,14 +1139,11 @@ define void @test_prefetch_offsetable_read_locality_3(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_read_locality_3:
; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -2048(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 2016(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r -32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a2)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.r 0(a1)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
@@ -1301,14 +1211,11 @@ define void @test_prefetch_offsetable_write_locality_3(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_write_locality_3:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.w 32(a0)
+; RV32ZICBOP-NEXT: prefetch.w -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.w 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.w 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.w 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -1322,14 +1229,11 @@ define void @test_prefetch_offsetable_write_locality_3(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_write_locality_3:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.w -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.w 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.w 32(a0)
+; RV64ZICBOP-NEXT: prefetch.w -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.w 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.w 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.w 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -1343,14 +1247,11 @@ define void @test_prefetch_offsetable_write_locality_3(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_write_locality_3:
; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -2048(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 2016(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w -32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a2)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.w 0(a1)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
@@ -1418,14 +1319,11 @@ define void @test_prefetch_offsetable_instruction_locality_3(ptr %a) nounwind {
;
; RV32ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_3:
; RV32ZICBOP: # %bb.0:
+; RV32ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV32ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV32ZICBOP-NEXT: prefetch.i 32(a0)
+; RV32ZICBOP-NEXT: prefetch.i -32(a0)
; RV32ZICBOP-NEXT: addi a1, a0, -2048
-; RV32ZICBOP-NEXT: prefetch.i 0(a1)
-; RV32ZICBOP-NEXT: addi a2, a0, 2016
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, 32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
-; RV32ZICBOP-NEXT: addi a2, a0, -32
-; RV32ZICBOP-NEXT: prefetch.i 0(a2)
; RV32ZICBOP-NEXT: addi a1, a1, -1
; RV32ZICBOP-NEXT: prefetch.i 0(a1)
; RV32ZICBOP-NEXT: addi a1, a0, 2047
@@ -1439,14 +1337,11 @@ define void @test_prefetch_offsetable_instruction_locality_3(ptr %a) nounwind {
;
; RV64ZICBOP-LABEL: test_prefetch_offsetable_instruction_locality_3:
; RV64ZICBOP: # %bb.0:
+; RV64ZICBOP-NEXT: prefetch.i -2048(a0)
+; RV64ZICBOP-NEXT: prefetch.i 2016(a0)
+; RV64ZICBOP-NEXT: prefetch.i 32(a0)
+; RV64ZICBOP-NEXT: prefetch.i -32(a0)
; RV64ZICBOP-NEXT: addi a1, a0, -2048
-; RV64ZICBOP-NEXT: prefetch.i 0(a1)
-; RV64ZICBOP-NEXT: addi a2, a0, 2016
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, 32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
-; RV64ZICBOP-NEXT: addi a2, a0, -32
-; RV64ZICBOP-NEXT: prefetch.i 0(a2)
; RV64ZICBOP-NEXT: addi a1, a1, -1
; RV64ZICBOP-NEXT: prefetch.i 0(a1)
; RV64ZICBOP-NEXT: addi a1, a0, 2047
@@ -1460,14 +1355,11 @@ define void @test_prefetch_offsetable_instruction_locality_3(ptr %a) nounwind {
;
; RV64ZICBOPZIHINTNTL-LABEL: test_prefetch_offsetable_instruction_locality_3:
; RV64ZICBOPZIHINTNTL: # %bb.0:
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -2048(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 2016(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 32(a0)
+; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i -32(a0)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, -2048
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 2016
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, 32
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
-; RV64ZICBOPZIHINTNTL-NEXT: addi a2, a0, -32
-; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a2)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a1, -1
; RV64ZICBOPZIHINTNTL-NEXT: prefetch.i 0(a1)
; RV64ZICBOPZIHINTNTL-NEXT: addi a1, a0, 2047
More information about the llvm-commits
mailing list