[Mlir-commits] [mlir] aad27bf - Add non-temporal support for LLVM masked loads (#104598)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sat Aug 17 01:03:10 PDT 2024
Author: Giuseppe Rossini
Date: 2024-08-17T09:03:06+01:00
New Revision: aad27bf534b59645f47a92f072af798687b1dd0d
URL: https://github.com/llvm/llvm-project/commit/aad27bf534b59645f47a92f072af798687b1dd0d
DIFF: https://github.com/llvm/llvm-project/commit/aad27bf534b59645f47a92f072af798687b1dd0d.diff
LOG: Add non-temporal support for LLVM masked loads (#104598)
This PR is adding non-temporal support to masked load as a `UnitAttr`
attribute. Non temporal load is quite an important feature for masked
loads to make the intrinsic usable from high level compilers
Added:
Modified:
mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
mlir/test/Target/LLVMIR/Import/intrinsic.ll
mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index 2e1635e590cad6..86983e95fdd33d 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -842,22 +842,27 @@ def LLVM_GetActiveLaneMaskOp
/// Create a call to Masked Load intrinsic.
def LLVM_MaskedLoadOp : LLVM_OneResultIntrOp<"masked.load"> {
let arguments = (ins LLVM_AnyPointer:$data, LLVM_VectorOf<I1>:$mask,
- Variadic<LLVM_AnyVector>:$pass_thru, I32Attr:$alignment);
+ Variadic<LLVM_AnyVector>:$pass_thru, I32Attr:$alignment,
+ UnitAttr:$nontemporal);
let results = (outs LLVM_AnyVector:$res);
let assemblyFormat =
"operands attr-dict `:` functional-type(operands, results)";
string llvmBuilder = [{
- $res = $pass_thru.empty() ? builder.CreateMaskedLoad(
+ auto *inst = $pass_thru.empty() ? builder.CreateMaskedLoad(
$_resultType, $data, llvm::Align($alignment), $mask) :
builder.CreateMaskedLoad(
$_resultType, $data, llvm::Align($alignment), $mask, $pass_thru[0]);
- }];
+ $res = inst;
+ }] #setNonTemporalMetadataCode;
string mlirBuilder = [{
+ auto *intrinInst = dyn_cast<llvm::IntrinsicInst>(inst);
+ bool nontemporal = intrinInst->hasMetadata(llvm::LLVMContext::MD_nontemporal);
$res = $_builder.create<LLVM::MaskedLoadOp>($_location,
- $_resultType, $data, $mask, $pass_thru, $_int_attr($alignment));
+ $_resultType, $data, $mask, $pass_thru, $_int_attr($alignment),
+ nontemporal ? $_builder.getUnitAttr() : nullptr);
}];
- list<int> llvmArgIndices = [0, 2, 3, 1];
+ list<int> llvmArgIndices = [0, 2, 3, 1, -1];
}
/// Create a call to Masked Store intrinsic.
diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index 9a5528002ef5e9..0fa82cef0a0f5a 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -409,6 +409,8 @@ define void @masked_load_store_intrinsics(ptr %vec, <7 x i1> %mask) {
%1 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr %vec, i32 1, <7 x i1> %mask, <7 x float> undef)
; CHECK: %[[VAL2:.+]] = llvm.intr.masked.load %[[VEC]], %[[MASK]], %[[VAL1]] {alignment = 4 : i32}
%2 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr %vec, i32 4, <7 x i1> %mask, <7 x float> %1)
+ ; CHECK: %[[VAL3:.+]] = llvm.intr.masked.load %[[VEC]], %[[MASK]], %[[VAL1]] {alignment = 4 : i32, nontemporal}
+ %3 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr %vec, i32 4, <7 x i1> %mask, <7 x float> %1), !nontemporal !{i32 1}
; CHECK: llvm.intr.masked.store %[[VAL2]], %[[VEC]], %[[MASK]] {alignment = 8 : i32}
; CHECK-SAME: vector<7xf32>, vector<7xi1> into !llvm.ptr
call void @llvm.masked.store.v7f32.p0(<7 x float> %2, ptr %vec, i32 8, <7 x i1> %mask)
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index 7878aa5ee46d4f..e2eadf14fc97e9 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -417,8 +417,11 @@ llvm.func @masked_load_store_intrinsics(%A: !llvm.ptr, %mask: vector<7xi1>) {
// CHECK: call <7 x float> @llvm.masked.load.v7f32.p0(ptr %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> poison)
%a = llvm.intr.masked.load %A, %mask { alignment = 1: i32} :
(!llvm.ptr, vector<7xi1>) -> vector<7xf32>
+ // CHECK: call <7 x float> @llvm.masked.load.v7f32.p0(ptr %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> poison), !nontemporal !1
+ %b = llvm.intr.masked.load %A, %mask { alignment = 1: i32, nontemporal} :
+ (!llvm.ptr, vector<7xi1>) -> vector<7xf32>
// CHECK: call <7 x float> @llvm.masked.load.v7f32.p0(ptr %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> %{{.*}})
- %b = llvm.intr.masked.load %A, %mask, %a { alignment = 1: i32} :
+ %c = llvm.intr.masked.load %A, %mask, %a { alignment = 1: i32} :
(!llvm.ptr, vector<7xi1>, vector<7xf32>) -> vector<7xf32>
// CHECK: call void @llvm.masked.store.v7f32.p0(<7 x float> %{{.*}}, ptr %0, i32 {{.*}}, <7 x i1> %{{.*}})
llvm.intr.masked.store %b, %A, %mask { alignment = 1: i32} :
More information about the Mlir-commits
mailing list