[Mlir-commits] [mlir] 10c04f4 - [mlir:GPU][NFC] Update GPU API to use prefixed accessors
River Riddle
llvmlistbot at llvm.org
Fri Sep 30 15:34:20 PDT 2022
Author: River Riddle
Date: 2022-09-30T15:27:10-07:00
New Revision: 10c04f464138012f0930882465eff90b74d8fd1d
URL: https://github.com/llvm/llvm-project/commit/10c04f464138012f0930882465eff90b74d8fd1d
DIFF: https://github.com/llvm/llvm-project/commit/10c04f464138012f0930882465eff90b74d8fd1d.diff
LOG: [mlir:GPU][NFC] Update GPU API to use prefixed accessors
This doesn't flip the switch for prefix generation yet, that'll be
done in a followup.
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 673be1bb41804..43d4c7ef9efa1 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -932,7 +932,7 @@ def GPU_AllocOp : GPU_Op<"alloc", [
Optional<GPU_AsyncToken>:$asyncToken);
let extraClassDeclaration = [{
- MemRefType getType() { return memref().getType().cast<MemRefType>(); }
+ MemRefType getType() { return getMemref().getType().cast<MemRefType>(); }
}];
let assemblyFormat = [{
@@ -1209,7 +1209,7 @@ def GPU_SubgroupMmaConstantMatrixOp : GPU_Op<"subgroup_mma_constant_matrix",
let extraClassDeclaration = [{
gpu::MMAMatrixType getType() {
- return res().getType().cast<gpu::MMAMatrixType>();
+ return getRes().getType().cast<gpu::MMAMatrixType>();
}
}];
@@ -1270,7 +1270,7 @@ def GPU_SubgroupMmaElementwiseOp : GPU_Op<"subgroup_mma_elementwise",
let extraClassDeclaration = [{
gpu::MMAMatrixType getType() {
- return res().getType().cast<gpu::MMAMatrixType>();
+ return getRes().getType().cast<gpu::MMAMatrixType>();
}
}];
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index 459180b9d9e4c..ca30af169ffd2 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -204,7 +204,7 @@ LogicalResult GPUPrintfOpToHIPLowering::matchAndRewrite(
getOrDefineFunction(moduleOp, loc, rewriter, "__ockl_printf_begin",
LLVM::LLVMFunctionType::get(llvmI64, {llvmI64}));
LLVM::LLVMFuncOp ocklAppendArgs;
- if (!adaptor.args().empty()) {
+ if (!adaptor.getArgs().empty()) {
ocklAppendArgs = getOrDefineFunction(
moduleOp, loc, rewriter, "__ockl_printf_append_args",
LLVM::LLVMFunctionType::get(
@@ -230,7 +230,7 @@ LogicalResult GPUPrintfOpToHIPLowering::matchAndRewrite(
(formatStringPrefix + Twine(stringNumber++)).toStringRef(stringConstName);
} while (moduleOp.lookupSymbol(stringConstName));
- llvm::SmallString<20> formatString(adaptor.format());
+ llvm::SmallString<20> formatString(adaptor.getFormat());
formatString.push_back('\0'); // Null terminate for C
size_t formatStringSize = formatString.size_in_bytes();
@@ -258,12 +258,12 @@ LogicalResult GPUPrintfOpToHIPLowering::matchAndRewrite(
auto appendFormatCall = rewriter.create<LLVM::CallOp>(
loc, ocklAppendStringN,
ValueRange{printfDesc, stringStart, stringLen,
- adaptor.args().empty() ? oneI32 : zeroI32});
+ adaptor.getArgs().empty() ? oneI32 : zeroI32});
printfDesc = appendFormatCall.getResult();
// __ockl_printf_append_args takes 7 values per append call
constexpr size_t argsPerAppend = 7;
- size_t nArgs = adaptor.args().size();
+ size_t nArgs = adaptor.getArgs().size();
for (size_t group = 0; group < nArgs; group += argsPerAppend) {
size_t bound = std::min(group + argsPerAppend, nArgs);
size_t numArgsThisCall = bound - group;
@@ -273,7 +273,7 @@ LogicalResult GPUPrintfOpToHIPLowering::matchAndRewrite(
arguments.push_back(
rewriter.create<LLVM::ConstantOp>(loc, llvmI32, numArgsThisCall));
for (size_t i = group; i < bound; ++i) {
- Value arg = adaptor.args()[i];
+ Value arg = adaptor.getArgs()[i];
if (auto floatType = arg.getType().dyn_cast<FloatType>()) {
if (!floatType.isF64())
arg = rewriter.create<LLVM::FPExtOp>(
@@ -325,7 +325,7 @@ LogicalResult GPUPrintfOpToLLVMCallLowering::matchAndRewrite(
(formatStringPrefix + Twine(stringNumber++)).toStringRef(stringConstName);
} while (moduleOp.lookupSymbol(stringConstName));
- llvm::SmallString<20> formatString(adaptor.format());
+ llvm::SmallString<20> formatString(adaptor.getFormat());
formatString.push_back('\0'); // Null terminate for C
auto globalType =
LLVM::LLVMArrayType::get(llvmI8, formatString.size_in_bytes());
@@ -345,7 +345,7 @@ LogicalResult GPUPrintfOpToLLVMCallLowering::matchAndRewrite(
loc, i8Ptr, globalPtr, ArrayRef<LLVM::GEPArg>{0, 0});
// Construct arguments and function call
- auto argsRange = adaptor.args();
+ auto argsRange = adaptor.getArgs();
SmallVector<Value, 4> printfArgs;
printfArgs.reserve(argsRange.size() + 1);
printfArgs.push_back(stringStart);
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index 91936496c8028..3fde6abe294fc 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -448,7 +448,7 @@ LogicalResult ConvertHostRegisterOpToGpuRuntimeCallPattern::matchAndRewrite(
Location loc = op->getLoc();
- auto memRefType = hostRegisterOp.value().getType();
+ auto memRefType = hostRegisterOp.getValue().getType();
auto elementType = memRefType.cast<UnrankedMemRefType>().getElementType();
auto elementSize = getSizeInBytes(loc, elementType, rewriter);
@@ -478,13 +478,13 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
SmallVector<Value, 4> shape;
SmallVector<Value, 4> strides;
Value sizeBytes;
- getMemRefDescriptorSizes(loc, memRefType, adaptor.dynamicSizes(), rewriter,
+ getMemRefDescriptorSizes(loc, memRefType, adaptor.getDynamicSizes(), rewriter,
shape, strides, sizeBytes);
// Allocate the underlying buffer and store a pointer to it in the MemRef
// descriptor.
Type elementPtrType = this->getElementPtrType(memRefType);
- auto stream = adaptor.asyncDependencies().front();
+ auto stream = adaptor.getAsyncDependencies().front();
Value allocatedPtr =
allocCallBuilder.create(loc, rewriter, {sizeBytes, stream}).getResult();
allocatedPtr =
@@ -512,9 +512,9 @@ LogicalResult ConvertDeallocOpToGpuRuntimeCallPattern::matchAndRewrite(
Location loc = deallocOp.getLoc();
Value pointer =
- MemRefDescriptor(adaptor.memref()).allocatedPtr(rewriter, loc);
+ MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
auto casted = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pointer);
- Value stream = adaptor.asyncDependencies().front();
+ Value stream = adaptor.getAsyncDependencies().front();
deallocCallBuilder.create(loc, rewriter, {casted, stream});
rewriter.replaceOp(deallocOp, {stream});
@@ -571,7 +571,7 @@ static bool isDefinedByCallTo(Value value, StringRef functionName) {
LogicalResult ConvertWaitOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::WaitOp waitOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
- if (waitOp.asyncToken())
+ if (waitOp.getAsyncToken())
return rewriter.notifyMatchFailure(waitOp, "Cannot convert async op.");
Location loc = waitOp.getLoc();
@@ -601,7 +601,7 @@ LogicalResult ConvertWaitOpToGpuRuntimeCallPattern::matchAndRewrite(
LogicalResult ConvertWaitAsyncOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::WaitOp waitOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
- if (!waitOp.asyncToken())
+ if (!waitOp.getAsyncToken())
return rewriter.notifyMatchFailure(waitOp, "Can only convert async op.");
Location loc = waitOp.getLoc();
@@ -609,7 +609,7 @@ LogicalResult ConvertWaitAsyncOpToGpuRuntimeCallPattern::matchAndRewrite(
auto insertionPoint = rewriter.saveInsertionPoint();
SmallVector<Value, 1> events;
for (auto pair :
- llvm::zip(waitOp.asyncDependencies(), adaptor.getOperands())) {
+ llvm::zip(waitOp.getAsyncDependencies(), adaptor.getOperands())) {
auto operand = std::get<1>(pair);
if (isDefinedByCallTo(operand, streamCreateCallBuilder.functionName)) {
// The converted operand's definition created a stream. Insert an event
@@ -747,14 +747,14 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
if (failed(areAllLLVMTypes(launchOp, adaptor.getOperands(), rewriter)))
return failure();
- if (launchOp.asyncDependencies().size() > 1)
+ if (launchOp.getAsyncDependencies().size() > 1)
return rewriter.notifyMatchFailure(
launchOp, "Cannot convert with more than one async dependency.");
// Fail when the synchronous version of the op has async dependencies. The
// lowering destroys the stream, and we do not want to check that there is no
// use of the stream after this op.
- if (!launchOp.asyncToken() && !launchOp.asyncDependencies().empty())
+ if (!launchOp.getAsyncToken() && !launchOp.getAsyncDependencies().empty())
return rewriter.notifyMatchFailure(
launchOp, "Cannot convert non-async op with async dependencies.");
@@ -790,23 +790,23 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
loc, rewriter, {module.getResult(), kernelName});
Value zero = rewriter.create<LLVM::ConstantOp>(loc, llvmInt32Type, 0);
Value stream =
- adaptor.asyncDependencies().empty()
+ adaptor.getAsyncDependencies().empty()
? streamCreateCallBuilder.create(loc, rewriter, {}).getResult()
- : adaptor.asyncDependencies().front();
+ : adaptor.getAsyncDependencies().front();
// Create array of pointers to kernel arguments.
auto kernelParams = generateParamsArray(launchOp, adaptor, rewriter);
auto nullpointer = rewriter.create<LLVM::NullOp>(loc, llvmPointerPointerType);
- Value dynamicSharedMemorySize = launchOp.dynamicSharedMemorySize()
- ? launchOp.dynamicSharedMemorySize()
+ Value dynamicSharedMemorySize = launchOp.getDynamicSharedMemorySize()
+ ? launchOp.getDynamicSharedMemorySize()
: zero;
launchKernelCallBuilder.create(
loc, rewriter,
- {function.getResult(), adaptor.gridSizeX(), adaptor.gridSizeY(),
- adaptor.gridSizeZ(), adaptor.blockSizeX(), adaptor.blockSizeY(),
- adaptor.blockSizeZ(), dynamicSharedMemorySize, stream, kernelParams,
+ {function.getResult(), adaptor.getGridSizeX(), adaptor.getGridSizeY(),
+ adaptor.getGridSizeZ(), adaptor.getBlockSizeX(), adaptor.getBlockSizeY(),
+ adaptor.getBlockSizeZ(), dynamicSharedMemorySize, stream, kernelParams,
/*extra=*/nullpointer});
- if (launchOp.asyncToken()) {
+ if (launchOp.getAsyncToken()) {
// Async launch: make dependent ops use the same stream.
rewriter.replaceOp(launchOp, {stream});
} else {
@@ -825,7 +825,7 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::MemcpyOp memcpyOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
- auto memRefType = memcpyOp.src().getType().cast<MemRefType>();
+ auto memRefType = memcpyOp.getSrc().getType().cast<MemRefType>();
if (failed(areAllLLVMTypes(memcpyOp, adaptor.getOperands(), rewriter)) ||
!isConvertibleAndHasIdentityMaps(memRefType) ||
@@ -834,7 +834,7 @@ LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
auto loc = memcpyOp.getLoc();
- MemRefDescriptor srcDesc(adaptor.src());
+ MemRefDescriptor srcDesc(adaptor.getSrc());
Value numElements = getNumElements(rewriter, loc, memRefType, srcDesc);
Type elementPtrType = getElementPtrType(memRefType);
@@ -848,9 +848,9 @@ LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
loc, llvmPointerType, srcDesc.alignedPtr(rewriter, loc));
auto dst = rewriter.create<LLVM::BitcastOp>(
loc, llvmPointerType,
- MemRefDescriptor(adaptor.dst()).alignedPtr(rewriter, loc));
+ MemRefDescriptor(adaptor.getDst()).alignedPtr(rewriter, loc));
- auto stream = adaptor.asyncDependencies().front();
+ auto stream = adaptor.getAsyncDependencies().front();
memcpyCallBuilder.create(loc, rewriter, {dst, src, sizeBytes, stream});
rewriter.replaceOp(memcpyOp, {stream});
@@ -861,7 +861,7 @@ LogicalResult ConvertMemcpyOpToGpuRuntimeCallPattern::matchAndRewrite(
LogicalResult ConvertMemsetOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::MemsetOp memsetOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
- auto memRefType = memsetOp.dst().getType().cast<MemRefType>();
+ auto memRefType = memsetOp.getDst().getType().cast<MemRefType>();
if (failed(areAllLLVMTypes(memsetOp, adaptor.getOperands(), rewriter)) ||
!isConvertibleAndHasIdentityMaps(memRefType) ||
@@ -870,21 +870,21 @@ LogicalResult ConvertMemsetOpToGpuRuntimeCallPattern::matchAndRewrite(
auto loc = memsetOp.getLoc();
- Type valueType = adaptor.value().getType();
+ Type valueType = adaptor.getValue().getType();
if (!valueType.isIntOrFloat() || valueType.getIntOrFloatBitWidth() != 32) {
return rewriter.notifyMatchFailure(memsetOp,
"value must be a 32 bit scalar");
}
- MemRefDescriptor dstDesc(adaptor.dst());
+ MemRefDescriptor dstDesc(adaptor.getDst());
Value numElements = getNumElements(rewriter, loc, memRefType, dstDesc);
auto value =
- rewriter.create<LLVM::BitcastOp>(loc, llvmInt32Type, adaptor.value());
+ rewriter.create<LLVM::BitcastOp>(loc, llvmInt32Type, adaptor.getValue());
auto dst = rewriter.create<LLVM::BitcastOp>(
loc, llvmPointerType, dstDesc.alignedPtr(rewriter, loc));
- auto stream = adaptor.asyncDependencies().front();
+ auto stream = adaptor.getAsyncDependencies().front();
memsetCallBuilder.create(loc, rewriter, {dst, value, numElements, stream});
rewriter.replaceOp(memsetOp, {stream});
@@ -895,7 +895,7 @@ LogicalResult ConvertSetDefaultDeviceOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::SetDefaultDeviceOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
Location loc = op.getLoc();
- setDefaultDeviceCallBuilder.create(loc, rewriter, {adaptor.devIndex()});
+ setDefaultDeviceCallBuilder.create(loc, rewriter, {adaptor.getDevIndex()});
rewriter.replaceOp(op, {});
return success();
}
diff --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
index b8712d6db2beb..f26b63ea8c648 100644
--- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
@@ -36,7 +36,7 @@ struct GPUIndexIntrinsicOpLowering : public ConvertOpToLLVMPattern<Op> {
auto loc = op->getLoc();
MLIRContext *context = rewriter.getContext();
Value newOp;
- switch (op.dimension()) {
+ switch (op.getDimension()) {
case gpu::Dimension::x:
newOp = rewriter.create<XOp>(loc, IntegerType::get(context, 32));
break;
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 496d84e24ac96..e5bc16ce54e11 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -87,7 +87,7 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
ConversionPatternRewriter &rewriter) const override {
Location loc = op->getLoc();
- auto valueTy = adaptor.value().getType();
+ auto valueTy = adaptor.getValue().getType();
auto int32Type = IntegerType::get(rewriter.getContext(), 32);
auto predTy = IntegerType::get(rewriter.getContext(), 1);
auto resultTy = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
@@ -97,24 +97,24 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Value minusOne = rewriter.create<LLVM::ConstantOp>(loc, int32Type, -1);
Value thirtyTwo = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 32);
Value numLeadInactiveLane = rewriter.create<LLVM::SubOp>(
- loc, int32Type, thirtyTwo, adaptor.width());
+ loc, int32Type, thirtyTwo, adaptor.getWidth());
// Bit mask of active lanes: `(-1) >> (32 - activeWidth)`.
Value activeMask = rewriter.create<LLVM::LShrOp>(loc, int32Type, minusOne,
numLeadInactiveLane);
Value maskAndClamp;
- if (op.mode() == gpu::ShuffleMode::UP) {
+ if (op.getMode() == gpu::ShuffleMode::UP) {
// Clamp lane: `32 - activeWidth`
maskAndClamp = numLeadInactiveLane;
} else {
// Clamp lane: `activeWidth - 1`
maskAndClamp =
- rewriter.create<LLVM::SubOp>(loc, int32Type, adaptor.width(), one);
+ rewriter.create<LLVM::SubOp>(loc, int32Type, adaptor.getWidth(), one);
}
auto returnValueAndIsValidAttr = rewriter.getUnitAttr();
Value shfl = rewriter.create<NVVM::ShflOp>(
- loc, resultTy, activeMask, adaptor.value(), adaptor.offset(),
- maskAndClamp, convertShflKind(op.mode()), returnValueAndIsValidAttr);
+ loc, resultTy, activeMask, adaptor.getValue(), adaptor.getOffset(),
+ maskAndClamp, convertShflKind(op.getMode()), returnValueAndIsValidAttr);
Value shflValue = rewriter.create<LLVM::ExtractValueOp>(loc, shfl, 0);
Value isActiveSrcLane = rewriter.create<LLVM::ExtractValueOp>(loc, shfl, 1);
diff --git a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
index 711de63443419..8ecbdf240c773 100644
--- a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
@@ -80,7 +80,7 @@ struct WmmaLoadOpToNVVMLowering
// Get the shape of the MMAMatrix type being returned. The shape will
// choose which intrinsic this op will be lowered to.
gpu::MMAMatrixType retType =
- subgroupMmaLoadMatrixOp.res().getType().cast<gpu::MMAMatrixType>();
+ subgroupMmaLoadMatrixOp.getRes().getType().cast<gpu::MMAMatrixType>();
ArrayRef<int64_t> retTypeShape = retType.getShape();
int64_t m = 0;
int64_t n = 0;
@@ -112,12 +112,13 @@ struct WmmaLoadOpToNVVMLowering
// Create nvvm.mma_load op according to the operand types.
Value dataPtr = getStridedElementPtr(
- loc, subgroupMmaLoadMatrixOp.srcMemref().getType().cast<MemRefType>(),
- adaptor.srcMemref(), adaptor.indices(), rewriter);
+ loc,
+ subgroupMmaLoadMatrixOp.getSrcMemref().getType().cast<MemRefType>(),
+ adaptor.getSrcMemref(), adaptor.getIndices(), rewriter);
Value leadingDim = rewriter.create<LLVM::ConstantOp>(
loc, rewriter.getI32Type(),
- subgroupMmaLoadMatrixOp.leadDimensionAttr());
+ subgroupMmaLoadMatrixOp.getLeadDimensionAttr());
rewriter.replaceOpWithNewOp<NVVM::WMMALoadOp>(
op, resType, dataPtr, leadingDim, m, n, k, layout, eltype, frag);
return success();
@@ -147,7 +148,7 @@ struct WmmaStoreOpToNVVMLowering
// Get the shape of the MMAMatrix type being stored. The shape will
// choose which intrinsic this op will be lowered to.
gpu::MMAMatrixType srcType =
- subgroupMmaStoreMatrixOp.src().getType().cast<gpu::MMAMatrixType>();
+ subgroupMmaStoreMatrixOp.getSrc().getType().cast<gpu::MMAMatrixType>();
ArrayRef<int64_t> srcTypeShape = srcType.getShape();
NVVM::MMALayout layout = NVVM::MMALayout::row;
NVVM::MMATypes eltype = getElementType(srcType);
@@ -157,19 +158,20 @@ struct WmmaStoreOpToNVVMLowering
if (NVVM::WMMAStoreOp::getIntrinsicID(m, n, k, layout, eltype) == 0)
return rewriter.notifyMatchFailure(op, kInvalidCaseStr);
- auto matrixType = adaptor.src().getType().cast<LLVM::LLVMStructType>();
+ auto matrixType = adaptor.getSrc().getType().cast<LLVM::LLVMStructType>();
for (unsigned i = 0, e = matrixType.getBody().size(); i < e; ++i) {
Value toUse =
- rewriter.create<LLVM::ExtractValueOp>(loc, adaptor.src(), i);
+ rewriter.create<LLVM::ExtractValueOp>(loc, adaptor.getSrc(), i);
storeOpOperands.push_back(toUse);
}
Value dataPtr = getStridedElementPtr(
- loc, subgroupMmaStoreMatrixOp.dstMemref().getType().cast<MemRefType>(),
- adaptor.dstMemref(), adaptor.indices(), rewriter);
+ loc,
+ subgroupMmaStoreMatrixOp.getDstMemref().getType().cast<MemRefType>(),
+ adaptor.getDstMemref(), adaptor.getIndices(), rewriter);
Value leadingDim = rewriter.create<LLVM::ConstantOp>(
loc, rewriter.getI32Type(),
- subgroupMmaStoreMatrixOp.leadDimensionAttr());
+ subgroupMmaStoreMatrixOp.getLeadDimensionAttr());
rewriter.replaceOpWithNewOp<NVVM::WMMAStoreOp>(
op, dataPtr, m, n, k, layout, eltype, storeOpOperands, leadingDim);
return success();
@@ -210,10 +212,10 @@ struct WmmaMmaOpToNVVMLowering
// Get the shapes of the MMAMatrix type being used. The shapes will
// choose which intrinsic this op will be lowered to.
gpu::MMAMatrixType aType =
- subgroupMmaComputeOp.opA().getType().cast<gpu::MMAMatrixType>();
+ subgroupMmaComputeOp.getOpA().getType().cast<gpu::MMAMatrixType>();
ArrayRef<int64_t> aTypeShape = aType.getShape();
gpu::MMAMatrixType cType =
- subgroupMmaComputeOp.opC().getType().cast<gpu::MMAMatrixType>();
+ subgroupMmaComputeOp.getOpC().getType().cast<gpu::MMAMatrixType>();
ArrayRef<int64_t> cTypeShape = cType.getShape();
int64_t m = cTypeShape[0];
int64_t n = cTypeShape[1];
@@ -225,12 +227,12 @@ struct WmmaMmaOpToNVVMLowering
destType) == 0)
return rewriter.notifyMatchFailure(op, kInvalidCaseStr);
- unpackOp(adaptor.opA());
- unpackOp(adaptor.opB());
- unpackOp(adaptor.opC());
+ unpackOp(adaptor.getOpA());
+ unpackOp(adaptor.getOpB());
+ unpackOp(adaptor.getOpC());
rewriter.replaceOpWithNewOp<NVVM::WMMAMmaOp>(
- op, adaptor.opC().getType(), m, n, k, layout, layout, sourceType,
+ op, adaptor.getOpC().getType(), m, n, k, layout, layout, sourceType,
destType, unpackedOps);
return success();
}
@@ -337,8 +339,9 @@ struct WmmaElementwiseOpToNVVMLowering
extractedOperands.push_back(rewriter.create<LLVM::ExtractValueOp>(
loc, adaptor.getOperands()[opIdx], i));
}
- Value element = createScalarOp(
- rewriter, loc, subgroupMmaElementwiseOp.opType(), extractedOperands);
+ Value element =
+ createScalarOp(rewriter, loc, subgroupMmaElementwiseOp.getOpType(),
+ extractedOperands);
matrixStruct =
rewriter.create<LLVM::InsertValueOp>(loc, matrixStruct, element, i);
}
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
index faf9a0baafa2c..1b51161964d80 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -150,7 +150,7 @@ LogicalResult LaunchConfigConversion<SourceOp, builtin>::matchAndRewrite(
spirv::getBuiltinVariableValue(op, builtin, indexType, rewriter);
rewriter.replaceOpWithNewOp<spirv::CompositeExtractOp>(
op, indexType, spirvBuiltin,
- rewriter.getI32ArrayAttr({static_cast<int32_t>(op.dimension())}));
+ rewriter.getI32ArrayAttr({static_cast<int32_t>(op.getDimension())}));
return success();
}
@@ -176,7 +176,7 @@ LogicalResult WorkGroupSizeConversion::matchAndRewrite(
return failure();
auto val = workGroupSizeAttr
- .getValues<int32_t>()[static_cast<int32_t>(op.dimension())];
+ .getValues<int32_t>()[static_cast<int32_t>(op.getDimension())];
auto convertedType =
getTypeConverter()->convertType(op.getResult().getType());
if (!convertedType)
@@ -389,7 +389,7 @@ LogicalResult GPUShuffleConversion::matchAndRewrite(
unsigned subgroupSize =
targetEnv.getAttr().getResourceLimits().getSubgroupSize();
IntegerAttr widthAttr;
- if (!matchPattern(shuffleOp.width(), m_Constant(&widthAttr)) ||
+ if (!matchPattern(shuffleOp.getWidth(), m_Constant(&widthAttr)) ||
widthAttr.getValue().getZExtValue() != subgroupSize)
return rewriter.notifyMatchFailure(
shuffleOp, "shuffle width and target subgroup size mismatch");
@@ -400,10 +400,10 @@ LogicalResult GPUShuffleConversion::matchAndRewrite(
auto scope = rewriter.getAttr<spirv::ScopeAttr>(spirv::Scope::Subgroup);
Value result;
- switch (shuffleOp.mode()) {
+ switch (shuffleOp.getMode()) {
case gpu::ShuffleMode::XOR:
result = rewriter.create<spirv::GroupNonUniformShuffleXorOp>(
- loc, scope, adaptor.value(), adaptor.offset());
+ loc, scope, adaptor.getValue(), adaptor.getOffset());
break;
default:
return rewriter.notifyMatchFailure(shuffleOp, "unimplemented shuffle mode");
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index bc5c354165822..73a115477cda3 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -248,15 +248,15 @@ void AffineLoopToGpuConverter::createLaunch(AffineForOp rootForOp,
terminator.erase();
builder.setInsertionPointToEnd(innermostForOp.getBody());
builder.create<gpu::TerminatorOp>(terminatorLoc, llvm::None);
- launchOp.body().front().getOperations().splice(
- launchOp.body().front().begin(),
+ launchOp.getBody().front().getOperations().splice(
+ launchOp.getBody().front().begin(),
innermostForOp.getBody()->getOperations());
// Remap the loop iterators to use block/thread identifiers instead. Loops
// may iterate from LB with step S whereas GPU thread/block ids always iterate
// from 0 to N with step 1. Therefore, loop induction variables are replaced
// with (gpu-thread/block-id * S) + LB.
- builder.setInsertionPointToStart(&launchOp.body().front());
+ builder.setInsertionPointToStart(&launchOp.getBody().front());
auto *lbArgumentIt = lbs.begin();
auto *stepArgumentIt = steps.begin();
for (const auto &en : llvm::enumerate(ivs)) {
@@ -447,7 +447,7 @@ static LogicalResult processParallelLoop(
if (isMappedToProcessor(processor)) {
// Use the corresponding thread/grid index as replacement for the loop iv.
Value operand =
- launchOp.body().getArgument(getLaunchOpArgumentNum(processor));
+ launchOp.getBody().getArgument(getLaunchOpArgumentNum(processor));
// Take the indexmap and add the lower bound and step computations in.
// This computes operand * step + lowerBound.
// Use an affine map here so that it composes nicely with the provided
@@ -610,9 +610,9 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
gpu::LaunchOp launchOp = rewriter.create<gpu::LaunchOp>(
parallelOp.getLoc(), constantOne, constantOne, constantOne, constantOne,
constantOne, constantOne);
- rewriter.setInsertionPointToEnd(&launchOp.body().front());
+ rewriter.setInsertionPointToEnd(&launchOp.getBody().front());
rewriter.create<gpu::TerminatorOp>(loc);
- rewriter.setInsertionPointToStart(&launchOp.body().front());
+ rewriter.setInsertionPointToStart(&launchOp.getBody().front());
BlockAndValueMapping cloningMap;
llvm::DenseMap<gpu::Processor, Value> launchBounds;
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 12b6ee5835d02..83ee9bbfa384b 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -225,14 +225,14 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
<< "' is undefined";
// Check that `launch_func` refers to a well-formed kernel function.
- Operation *kernelFunc = module.lookupSymbol(launchOp.kernelAttr());
+ Operation *kernelFunc = module.lookupSymbol(launchOp.getKernelAttr());
if (!kernelFunc)
return launchOp.emitOpError("kernel function '")
- << launchOp.kernel() << "' is undefined";
+ << launchOp.getKernel() << "' is undefined";
auto kernelConvertedFunction = dyn_cast<FunctionOpInterface>(kernelFunc);
if (!kernelConvertedFunction) {
InFlightDiagnostic diag = launchOp.emitOpError()
- << "referenced kernel '" << launchOp.kernel()
+ << "referenced kernel '" << launchOp.getKernel()
<< "' is not a function";
diag.attachNote(kernelFunc->getLoc()) << "see the kernel definition here";
return diag;
@@ -310,17 +310,17 @@ static void printAsyncDependencies(OpAsmPrinter &printer, Operation *op,
//===----------------------------------------------------------------------===//
LogicalResult gpu::AllReduceOp::verifyRegions() {
- if (body().empty() != op().has_value())
+ if (getBody().empty() != getOp().has_value())
return emitError("expected either an op attribute or a non-empty body");
- if (!body().empty()) {
- if (body().getNumArguments() != 2)
+ if (!getBody().empty()) {
+ if (getBody().getNumArguments() != 2)
return emitError("expected two region arguments");
- for (auto argument : body().getArguments()) {
+ for (auto argument : getBody().getArguments()) {
if (argument.getType() != getType())
return emitError("incorrect region argument type");
}
unsigned yieldCount = 0;
- for (Block &block : body()) {
+ for (Block &block : getBody()) {
if (auto yield = dyn_cast<gpu::YieldOp>(block.getTerminator())) {
if (yield.getNumOperands() != 1)
return emitError("expected one gpu.yield operand");
@@ -332,7 +332,7 @@ LogicalResult gpu::AllReduceOp::verifyRegions() {
if (yieldCount == 0)
return emitError("expected gpu.yield op in region");
} else {
- gpu::AllReduceOperation opName = *op();
+ gpu::AllReduceOperation opName = *getOp();
if ((opName == gpu::AllReduceOperation::AND ||
opName == gpu::AllReduceOperation::OR ||
opName == gpu::AllReduceOperation::XOR) &&
@@ -391,16 +391,16 @@ void gpu::addAsyncDependency(Operation *op, Value token) {
void LaunchOp::build(OpBuilder &builder, OperationState &result,
Value gridSizeX, Value gridSizeY, Value gridSizeZ,
- Value blockSizeX, Value blockSizeY, Value blockSizeZ,
- Value dynamicSharedMemorySize, Type asyncTokenType,
- ValueRange asyncDependencies) {
+ Value getBlockSizeX, Value getBlockSizeY,
+ Value getBlockSizeZ, Value dynamicSharedMemorySize,
+ Type asyncTokenType, ValueRange asyncDependencies) {
result.addOperands(asyncDependencies);
if (asyncTokenType)
result.types.push_back(builder.getType<AsyncTokenType>());
// Add grid and block sizes as op operands, followed by the data operands.
- result.addOperands(
- {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
+ result.addOperands({gridSizeX, gridSizeY, gridSizeZ, getBlockSizeX,
+ getBlockSizeY, getBlockSizeZ});
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
@@ -420,36 +420,36 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
}
KernelDim3 LaunchOp::getBlockIds() {
- assert(!body().empty() && "LaunchOp body must not be empty.");
- auto args = body().getArguments();
+ assert(!getBody().empty() && "LaunchOp body must not be empty.");
+ auto args = getBody().getArguments();
return KernelDim3{args[0], args[1], args[2]};
}
KernelDim3 LaunchOp::getThreadIds() {
- assert(!body().empty() && "LaunchOp body must not be empty.");
- auto args = body().getArguments();
+ assert(!getBody().empty() && "LaunchOp body must not be empty.");
+ auto args = getBody().getArguments();
return KernelDim3{args[3], args[4], args[5]};
}
KernelDim3 LaunchOp::getGridSize() {
- assert(!body().empty() && "LaunchOp body must not be empty.");
- auto args = body().getArguments();
+ assert(!getBody().empty() && "LaunchOp body must not be empty.");
+ auto args = getBody().getArguments();
return KernelDim3{args[6], args[7], args[8]};
}
KernelDim3 LaunchOp::getBlockSize() {
- assert(!body().empty() && "LaunchOp body must not be empty.");
- auto args = body().getArguments();
+ assert(!getBody().empty() && "LaunchOp body must not be empty.");
+ auto args = getBody().getArguments();
return KernelDim3{args[9], args[10], args[11]};
}
KernelDim3 LaunchOp::getGridSizeOperandValues() {
- auto operands = getOperands().drop_front(asyncDependencies().size());
+ auto operands = getOperands().drop_front(getAsyncDependencies().size());
return KernelDim3{operands[0], operands[1], operands[2]};
}
KernelDim3 LaunchOp::getBlockSizeOperandValues() {
- auto operands = getOperands().drop_front(asyncDependencies().size());
+ auto operands = getOperands().drop_front(getAsyncDependencies().size());
return KernelDim3{operands[3], operands[4], operands[5]};
}
@@ -457,16 +457,17 @@ LogicalResult LaunchOp::verifyRegions() {
// Kernel launch takes kNumConfigOperands leading operands for grid/block
// sizes and transforms them into kNumConfigRegionAttributes region arguments
// for block/thread identifiers and grid/block sizes.
- if (!body().empty()) {
- if (body().getNumArguments() !=
+ if (!getBody().empty()) {
+ if (getBody().getNumArguments() !=
LaunchOp::kNumConfigOperands + getNumOperands() -
- (dynamicSharedMemorySize() ? 1 : 0) - asyncDependencies().size())
+ (getDynamicSharedMemorySize() ? 1 : 0) -
+ getAsyncDependencies().size())
return emitOpError("unexpected number of region arguments");
}
// Block terminators without successors are expected to exit the kernel region
// and must be `gpu.terminator`.
- for (Block &block : body()) {
+ for (Block &block : getBody()) {
if (block.empty())
continue;
if (block.back().getNumSuccessors() != 0)
@@ -481,7 +482,7 @@ LogicalResult LaunchOp::verifyRegions() {
}
}
- if (getNumResults() == 0 && asyncToken())
+ if (getNumResults() == 0 && getAsyncToken())
return emitOpError("needs to be named when async keyword is specified");
return success();
@@ -500,10 +501,10 @@ static void printSizeAssignment(OpAsmPrinter &p, KernelDim3 size,
}
void LaunchOp::print(OpAsmPrinter &p) {
- if (asyncToken()) {
+ if (getAsyncToken()) {
p << " async";
- if (!asyncDependencies().empty())
- p << " [" << asyncDependencies() << ']';
+ if (!getAsyncDependencies().empty())
+ p << " [" << getAsyncDependencies() << ']';
}
// Print the launch configuration.
p << ' ' << getBlocksKeyword();
@@ -512,12 +513,12 @@ void LaunchOp::print(OpAsmPrinter &p) {
p << ' ' << getThreadsKeyword();
printSizeAssignment(p, getBlockSize(), getBlockSizeOperandValues(),
getThreadIds());
- if (dynamicSharedMemorySize())
+ if (getDynamicSharedMemorySize())
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
- << dynamicSharedMemorySize();
+ << getDynamicSharedMemorySize();
p << ' ';
- p.printRegion(body(), /*printEntryBlockArgs=*/false);
+ p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
LaunchOp::getOperandSegmentSizeAttr()});
}
@@ -657,19 +658,19 @@ struct FoldLaunchArguments : public OpRewritePattern<LaunchOp> {
if (!simplified) {
// Create a zero value the first time.
OpBuilder::InsertionGuard guard(rewriter);
- rewriter.setInsertionPointToStart(&op.body().front());
+ rewriter.setInsertionPointToStart(&op.getBody().front());
zero =
rewriter.create<arith::ConstantIndexOp>(op.getLoc(), /*value=*/0);
}
id.replaceAllUsesWith(zero);
simplified = true;
};
- constPropIdUses(op.getBlockIds().x, op.gridSizeX());
- constPropIdUses(op.getBlockIds().y, op.gridSizeY());
- constPropIdUses(op.getBlockIds().z, op.gridSizeZ());
- constPropIdUses(op.getThreadIds().x, op.blockSizeX());
- constPropIdUses(op.getThreadIds().y, op.blockSizeY());
- constPropIdUses(op.getThreadIds().z, op.blockSizeZ());
+ constPropIdUses(op.getBlockIds().x, op.getGridSizeX());
+ constPropIdUses(op.getBlockIds().y, op.getGridSizeY());
+ constPropIdUses(op.getBlockIds().z, op.getGridSizeZ());
+ constPropIdUses(op.getThreadIds().x, op.getBlockSizeX());
+ constPropIdUses(op.getThreadIds().y, op.getBlockSizeY());
+ constPropIdUses(op.getThreadIds().z, op.getBlockSizeZ());
return success(simplified);
}
@@ -686,7 +687,7 @@ void LaunchOp::getCanonicalizationPatterns(RewritePatternSet &rewrites,
void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
GPUFuncOp kernelFunc, KernelDim3 gridSize,
- KernelDim3 blockSize, Value dynamicSharedMemorySize,
+ KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
ValueRange kernelOperands, Type asyncTokenType,
ValueRange asyncDependencies) {
result.addOperands(asyncDependencies);
@@ -694,8 +695,8 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
result.types.push_back(builder.getType<AsyncTokenType>());
// Add grid and block sizes as op operands, followed by the data operands.
- result.addOperands({gridSize.x, gridSize.y, gridSize.z, blockSize.x,
- blockSize.y, blockSize.z});
+ result.addOperands({gridSize.x, gridSize.y, gridSize.z, getBlockSize.x,
+ getBlockSize.y, getBlockSize.z});
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
result.addOperands(kernelOperands);
@@ -713,24 +714,28 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
}
StringAttr LaunchFuncOp::getKernelModuleName() {
- return kernel().getRootReference();
+ return getKernel().getRootReference();
}
-StringAttr LaunchFuncOp::getKernelName() { return kernel().getLeafReference(); }
+StringAttr LaunchFuncOp::getKernelName() {
+ return getKernel().getLeafReference();
+}
unsigned LaunchFuncOp::getNumKernelOperands() {
- return kernelOperands().size();
+ return getKernelOperands().size();
}
-Value LaunchFuncOp::getKernelOperand(unsigned i) { return kernelOperands()[i]; }
+Value LaunchFuncOp::getKernelOperand(unsigned i) {
+ return getKernelOperands()[i];
+}
KernelDim3 LaunchFuncOp::getGridSizeOperandValues() {
- auto operands = getOperands().drop_front(asyncDependencies().size());
+ auto operands = getOperands().drop_front(getAsyncDependencies().size());
return KernelDim3{operands[0], operands[1], operands[2]};
}
KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
- auto operands = getOperands().drop_front(asyncDependencies().size());
+ auto operands = getOperands().drop_front(getAsyncDependencies().size());
return KernelDim3{operands[3], operands[4], operands[5]};
}
@@ -1091,8 +1096,8 @@ void GPUModuleOp::print(OpAsmPrinter &p) {
//===----------------------------------------------------------------------===//
LogicalResult MemcpyOp::verify() {
- auto srcType = src().getType();
- auto dstType = dst().getType();
+ auto srcType = getSrc().getType();
+ auto dstType = getDst().getType();
if (getElementTypeOrSelf(srcType) != getElementTypeOrSelf(dstType))
return emitOpError("arguments have incompatible element type");
@@ -1112,7 +1117,7 @@ struct EraseTrivialCopyOp : public OpRewritePattern<MemcpyOp> {
LogicalResult matchAndRewrite(MemcpyOp op,
PatternRewriter &rewriter) const override {
- Value dest = op.dst();
+ Value dest = op.getDst();
Operation *destDefOp = dest.getDefiningOp();
// `dest` must be defined by an op having Allocate memory effect in order to
// perform the folding.
@@ -1129,11 +1134,11 @@ struct EraseTrivialCopyOp : public OpRewritePattern<MemcpyOp> {
// We can perform the folding if and only if op has a single async
// dependency and produces an async token as result, or if it does not have
// any async dependency and does not produce any async token result.
- if (op.asyncDependencies().size() > 1 ||
- ((op.asyncDependencies().empty() && op.asyncToken()) ||
- (!op.asyncDependencies().empty() && !op.asyncToken())))
+ if (op.getAsyncDependencies().size() > 1 ||
+ ((op.getAsyncDependencies().empty() && op.getAsyncToken()) ||
+ (!op.getAsyncDependencies().empty() && !op.getAsyncToken())))
return failure();
- rewriter.replaceOp(op, op.asyncDependencies());
+ rewriter.replaceOp(op, op.getAsyncDependencies());
return success();
}
};
@@ -1161,8 +1166,8 @@ static bool isLastMemrefDimUnitStride(MemRefType type) {
}
LogicalResult SubgroupMmaLoadMatrixOp::verify() {
- auto srcType = srcMemref().getType();
- auto resType = res().getType();
+ auto srcType = getSrcMemref().getType();
+ auto resType = getRes().getType();
auto resMatrixType = resType.cast<gpu::MMAMatrixType>();
auto operand = resMatrixType.getOperand();
auto srcMemrefType = srcType.cast<MemRefType>();
@@ -1190,8 +1195,8 @@ LogicalResult SubgroupMmaLoadMatrixOp::verify() {
//===----------------------------------------------------------------------===//
LogicalResult SubgroupMmaStoreMatrixOp::verify() {
- auto srcType = src().getType();
- auto dstType = dstMemref().getType();
+ auto srcType = getSrc().getType();
+ auto dstType = getDstMemref().getType();
auto srcMatrixType = srcType.cast<gpu::MMAMatrixType>();
auto dstMemrefType = dstType.cast<MemRefType>();
auto dstMemSpace = dstMemrefType.getMemorySpaceAsInt();
@@ -1219,9 +1224,9 @@ LogicalResult SubgroupMmaStoreMatrixOp::verify() {
LogicalResult SubgroupMmaComputeOp::verify() {
enum OperandMap { A, B, C };
SmallVector<MMAMatrixType, 3> opTypes;
- opTypes.push_back(opA().getType().cast<MMAMatrixType>());
- opTypes.push_back(opB().getType().cast<MMAMatrixType>());
- opTypes.push_back(opC().getType().cast<MMAMatrixType>());
+ opTypes.push_back(getOpA().getType().cast<MMAMatrixType>());
+ opTypes.push_back(getOpB().getType().cast<MMAMatrixType>());
+ opTypes.push_back(getOpC().getType().cast<MMAMatrixType>());
if (!opTypes[A].getOperand().equals("AOp") ||
!opTypes[B].getOperand().equals("BOp") ||
@@ -1284,7 +1289,7 @@ struct EraseRedundantGpuWaitOpPairs : public OpRewritePattern<WaitOp> {
auto waitOp = value.getDefiningOp<WaitOp>();
return waitOp && waitOp->getNumOperands() == 0;
};
- if (llvm::none_of(op.asyncDependencies(), predicate))
+ if (llvm::none_of(op.getAsyncDependencies(), predicate))
return failure();
SmallVector<Value> validOperands;
for (Value operand : op->getOperands()) {
@@ -1312,17 +1317,18 @@ struct SimplifyGpuWaitOp : public OpRewritePattern<WaitOp> {
PatternRewriter &rewriter) const final {
// Erase gpu.wait ops that neither have any async dependencies nor return
// any async token.
- if (op.asyncDependencies().empty() && !op.asyncToken()) {
+ if (op.getAsyncDependencies().empty() && !op.getAsyncToken()) {
rewriter.eraseOp(op);
return success();
}
// Replace uses of %t1 = gpu.wait async [%t0] ops with %t0 and erase the op.
- if (llvm::hasSingleElement(op.asyncDependencies()) && op.asyncToken()) {
- rewriter.replaceOp(op, op.asyncDependencies());
+ if (llvm::hasSingleElement(op.getAsyncDependencies()) &&
+ op.getAsyncToken()) {
+ rewriter.replaceOp(op, op.getAsyncDependencies());
return success();
}
// Erase %t = gpu.wait async ... ops, where %t has no uses.
- if (op.asyncToken() && op.asyncToken().use_empty()) {
+ if (op.getAsyncToken() && op.getAsyncToken().use_empty()) {
rewriter.eraseOp(op);
return success();
}
@@ -1342,9 +1348,9 @@ void WaitOp::getCanonicalizationPatterns(RewritePatternSet &results,
//===----------------------------------------------------------------------===//
LogicalResult AllocOp::verify() {
- auto memRefType = memref().getType().cast<MemRefType>();
+ auto memRefType = getMemref().getType().cast<MemRefType>();
- if (static_cast<int64_t>(dynamicSizes().size()) !=
+ if (static_cast<int64_t>(getDynamicSizes().size()) !=
memRefType.getNumDynamicDims())
return emitOpError("dimension operand count does not equal memref "
"dynamic dimension count");
@@ -1352,7 +1358,7 @@ LogicalResult AllocOp::verify() {
unsigned numSymbols = 0;
if (!memRefType.getLayout().isIdentity())
numSymbols = memRefType.getLayout().getAffineMap().getNumSymbols();
- if (symbolOperands().size() != numSymbols) {
+ if (getSymbolOperands().size() != numSymbols) {
return emitOpError(
"symbol operand count does not equal memref symbol count");
}
@@ -1381,7 +1387,7 @@ struct SimplifyDimOfAllocOp : public OpRewritePattern<memref::DimOp> {
if (!alloc)
return failure();
- Value substituteOp = *(alloc.dynamicSizes().begin() +
+ Value substituteOp = *(alloc.getDynamicSizes().begin() +
memrefType.getDynamicDimIndex(index.value()));
rewriter.replaceOp(dimOp, substituteOp);
return success();
diff --git a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
index 5d90fdd59022d..3df44a29296ba 100644
--- a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
+++ b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
@@ -83,7 +83,7 @@ void LaunchOp::inferResultRanges(ArrayRef<ConstantIntRanges> argRanges,
setResultRange(idxResult, idxRange);
};
- argRanges = argRanges.drop_front(asyncDependencies().size());
+ argRanges = argRanges.drop_front(getAsyncDependencies().size());
KernelDim3 gridDims = getGridSize();
KernelDim3 blockIds = getBlockIds();
setRange(argRanges[0], gridDims.x, blockIds.x);
diff --git a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
index 89334663ed749..32bf6e3f82157 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
@@ -31,7 +31,7 @@ struct GpuAllReduceRewriter {
GpuAllReduceRewriter(gpu::GPUFuncOp funcOp, gpu::AllReduceOp reduceOp,
PatternRewriter &rewriter)
: funcOp(funcOp), reduceOp(reduceOp), rewriter(rewriter),
- loc(reduceOp.getLoc()), valueType(reduceOp.value().getType()),
+ loc(reduceOp.getLoc()), valueType(reduceOp.getValue().getType()),
indexType(IndexType::get(reduceOp.getContext())),
int32Type(IntegerType::get(reduceOp.getContext(), /*width=*/32)) {}
@@ -100,8 +100,8 @@ struct GpuAllReduceRewriter {
assert(accumFactory && "failed to create accumulator factory");
// Reduce elements within each subgroup to produce the intermediate results.
- Value subgroupReduce = createSubgroupReduce(activeWidth, laneId,
- reduceOp.value(), accumFactory);
+ Value subgroupReduce = createSubgroupReduce(
+ activeWidth, laneId, reduceOp.getValue(), accumFactory);
// Add workgroup buffer to parent function for intermediate result.
Value buffer = createWorkgroupBuffer();
@@ -168,10 +168,10 @@ struct GpuAllReduceRewriter {
/// Returns an accumulator factory using either the op attribute or the body
/// region.
AccumulatorFactory getFactory() {
- auto &body = reduceOp.body();
+ auto &body = reduceOp.getBody();
if (!body.empty())
return getFactory(body);
- auto opAttr = reduceOp.op();
+ auto opAttr = reduceOp.getOp();
if (opAttr)
return getFactory(*opAttr);
return AccumulatorFactory();
diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
index ab0aa7cc2ed25..0c669c724c03e 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
@@ -77,7 +77,7 @@ struct GpuAsyncRegionPass::ThreadTokenCallback {
if (auto waitOp = llvm::dyn_cast<gpu::WaitOp>(op)) {
if (currentToken)
waitOp.addAsyncDependency(currentToken);
- currentToken = waitOp.asyncToken();
+ currentToken = waitOp.getAsyncToken();
return success();
}
builder.setInsertionPoint(op);
@@ -132,7 +132,8 @@ struct GpuAsyncRegionPass::ThreadTokenCallback {
}
Value createWaitOp(Location loc, Type resultType, ValueRange operands) {
- return builder.create<gpu::WaitOp>(loc, resultType, operands).asyncToken();
+ return builder.create<gpu::WaitOp>(loc, resultType, operands)
+ .getAsyncToken();
}
OpBuilder builder;
@@ -194,7 +195,7 @@ struct GpuAsyncRegionPass::DeferWaitCallback {
// async.execute's region is currently restricted to one block.
for (auto &op : llvm::reverse(executeOp.getBody()->without_terminator())) {
if (auto waitOp = dyn_cast<gpu::WaitOp>(op)) {
- if (!waitOp.asyncToken())
+ if (!waitOp.getAsyncToken())
worklist.push_back(waitOp);
return;
}
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 03fe4bebf0a33..1fe9e74d38af8 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -117,7 +117,7 @@ LogicalResult mlir::sinkOperationsIntoLaunchOp(
gpu::LaunchOp launchOp,
llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
assert(isSinkingBeneficiary);
- Region &launchOpBody = launchOp.body();
+ Region &launchOpBody = launchOp.getBody();
// Identify uses from values defined outside of the scope of the launch
// operation.
@@ -142,7 +142,7 @@ LogicalResult mlir::sinkOperationsIntoLaunchOp(
// Only replace uses within the launch op.
for (auto pair : llvm::zip(op->getResults(), clonedOp->getResults()))
replaceAllUsesInRegionWith(std::get<0>(pair), std::get<1>(pair),
- launchOp.body());
+ launchOp.getBody());
}
return success();
}
@@ -156,7 +156,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
// Create a builder with no insertion point, insertion will happen separately
// due to symbol table manipulation.
OpBuilder builder(launchOp.getContext());
- Region &launchOpBody = launchOp.body();
+ Region &launchOpBody = launchOp.getBody();
// Identify uses from values defined outside of the scope of the launch
// operation.
@@ -177,7 +177,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
// Map the arguments corresponding to the launch parameters like blockIdx,
// threadIdx, etc.
- Region &outlinedFuncBody = outlinedFunc.body();
+ Region &outlinedFuncBody = outlinedFunc.getBody();
injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map);
// Map arguments from gpu.launch region to the arguments of the gpu.func
@@ -231,12 +231,13 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
OpBuilder builder(launchOp);
// The launch op has an optional dynamic shared memory size. If it doesn't
// exist, we use zero.
- Value asyncToken = launchOp.asyncToken();
+ Value asyncToken = launchOp.getAsyncToken();
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
- launchOp.getBlockSizeOperandValues(), launchOp.dynamicSharedMemorySize(),
- operands, asyncToken ? asyncToken.getType() : nullptr,
- launchOp.asyncDependencies());
+ launchOp.getBlockSizeOperandValues(),
+ launchOp.getDynamicSharedMemorySize(), operands,
+ asyncToken ? asyncToken.getType() : nullptr,
+ launchOp.getAsyncDependencies());
launchOp.replaceAllUsesWith(launchFunc);
launchOp.erase();
}
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 2993d9c00e58c..7f7459fe6c3e8 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -1325,17 +1325,20 @@ static LogicalResult alterGpuLaunch(SimpleRewriter &rewriter,
};
if (gridDimX.has_value())
- gpuLaunch.gridSizeXMutable().assign(createConstValue(gridDimX.value()));
+ gpuLaunch.getGridSizeXMutable().assign(createConstValue(gridDimX.value()));
if (gridDimY.has_value())
- gpuLaunch.gridSizeYMutable().assign(createConstValue(gridDimY.value()));
+ gpuLaunch.getGridSizeYMutable().assign(createConstValue(gridDimY.value()));
if (gridDimZ.has_value())
- gpuLaunch.gridSizeZMutable().assign(createConstValue(gridDimZ.value()));
+ gpuLaunch.getGridSizeZMutable().assign(createConstValue(gridDimZ.value()));
if (blockDimX.has_value())
- gpuLaunch.blockSizeXMutable().assign(createConstValue(blockDimX.value()));
+ gpuLaunch.getBlockSizeXMutable().assign(
+ createConstValue(blockDimX.value()));
if (blockDimY.has_value())
- gpuLaunch.blockSizeYMutable().assign(createConstValue(blockDimY.value()));
+ gpuLaunch.getBlockSizeYMutable().assign(
+ createConstValue(blockDimY.value()));
if (blockDimZ.has_value())
- gpuLaunch.blockSizeZMutable().assign(createConstValue(blockDimZ.value()));
+ gpuLaunch.getBlockSizeZMutable().assign(
+ createConstValue(blockDimZ.value()));
return success();
}
@@ -1480,7 +1483,7 @@ createGpuLaunch(RewriterBase &rewriter, Location loc,
blockDimZ.has_value() ? createConstant(blockDimZ.value()) : one;
auto launchOp = rewriter.create<gpu::LaunchOp>(
loc, gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ);
- rewriter.setInsertionPointToEnd(&launchOp.body().front());
+ rewriter.setInsertionPointToEnd(&launchOp.getBody().front());
rewriter.create<gpu::TerminatorOp>(loc);
return launchOp;
}
@@ -1530,7 +1533,7 @@ transform::MapNestedForeachThreadToGpuBlocks::applyToOne(
if (failed(maybeGpuLaunch))
return DiagnosedSilenceableFailure(reportUnknownTransformError(target));
gpuLaunch = *maybeGpuLaunch;
- rewriter.setInsertionPointToStart(&gpuLaunch.body().front());
+ rewriter.setInsertionPointToStart(&gpuLaunch.getBody().front());
Operation *newForeachThreadOp = rewriter.clone(*topLevelForeachThreadOp);
rewriter.eraseOp(topLevelForeachThreadOp);
topLevelForeachThreadOp =
More information about the Mlir-commits
mailing list