[clang] [llvm] [mlir] Revert "[AMDGPU] Rework the clamp support for WMMA instructions" (PR #174674)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 6 16:49:06 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (dyung)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->174310
This change is causing 2 cross-project-test failures on https://lab.llvm.org/buildbot/#/builders/174/builds/29695
---
Patch is 87.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174674.diff
18 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+2-2)
- (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (-14)
- (modified) clang/lib/Sema/SemaAMDGPU.cpp (-34)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl (+2-24)
- (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl (-6)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4-23)
- (modified) llvm/lib/IR/AutoUpgrade.cpp (-57)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+5-7)
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+9-9)
- (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll (+6-6)
- (removed) llvm/test/Bitcode/amdgpu-wmma-clamp-upgrade.ll (-25)
- (modified) llvm/test/CodeGen/AMDGPU/wmma-coececution-valu-hazards.mir (+26-26)
- (modified) llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx1250-w32.mir (+20-20)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32.s (-10)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_wmma_w32_err.s (+6)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_wmma_w32.txt (-6)
- (modified) mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td (+4-5)
- (modified) mlir/test/Target/LLVMIR/rocdl.mlir (+6-6)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 7faf73b7628fe..24b79c3b69b67 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -857,7 +857,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
@@ -885,7 +885,7 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8, "V8hV8iV16iV8hiIbI
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbIb.", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index a8a5bc348f00c..eabdc370da6b4 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -1665,20 +1665,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
- // Handle the optional clamp argument of the following two builtins.
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
- if (Args.size() == 7)
- Args.push_back(Builder.getFalse());
- assert(Args.size() == 8 && "Expected 8 arguments");
- Args[7] = Builder.CreateZExtOrTrunc(Args[7], Builder.getInt1Ty());
- } else if (BuiltinID ==
- AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
- if (Args.size() == 8)
- Args.push_back(Builder.getFalse());
- assert(Args.size() == 9 && "Expected 9 arguments");
- Args[8] = Builder.CreateZExtOrTrunc(Args[8], Builder.getInt1Ty());
- }
-
SmallVector<llvm::Type *, 6> ArgTypes;
if (NeedReturnType)
ArgTypes.push_back(ConvertType(E->getType()));
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 9d154c65c932e..cece22092bb14 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -255,40 +255,6 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result));
}
- case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
- case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
- if (SemaRef.checkArgCountRange(TheCall, 7, 8))
- return true;
- if (TheCall->getNumArgs() == 7)
- return false;
- } else if (BuiltinID ==
- AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
- if (SemaRef.checkArgCountRange(TheCall, 8, 9))
- return true;
- if (TheCall->getNumArgs() == 8)
- return false;
- }
- // Check if the last argument (clamp operand) is a constant and is
- // convertible to bool.
- Expr *ClampArg = TheCall->getArg(TheCall->getNumArgs() - 1);
- // 1) Ensure clamp argument is a constant expression
- llvm::APSInt ClampValue;
- if (!SemaRef.VerifyIntegerConstantExpression(ClampArg, &ClampValue)
- .isUsable())
- return true;
- // 2) Check if the argument can be converted to bool type
- if (!SemaRef.Context.hasSameType(ClampArg->getType(),
- SemaRef.Context.BoolTy)) {
- // Try to convert to bool
- QualType BoolTy = SemaRef.Context.BoolTy;
- ExprResult ClampExpr(ClampArg);
- SemaRef.CheckSingleAssignmentConstraints(BoolTy, ClampExpr);
- if (ClampExpr.isInvalid())
- return true;
- }
- return false;
- }
default:
return false;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index a463ba7ab41c3..afad4bb15b528 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,7 +148,7 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h* out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
// CHECK-GFX1250-NEXT: ret void
//
@@ -157,17 +157,6 @@ void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
}
-// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8_clamp(
-// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 true)
-// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
-// CHECK-GFX1250-NEXT: ret void
-//
-void test_amdgcn_wmma_i32_16x16x64_iu8_clamp(global v8i* out, v8i a, v8i b, v8i c)
-{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, 1);
-}
-
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -470,7 +459,7 @@ void test_amdgcn_swmmac_f16_16x16x128_bf8_bf8(global v8h* out, v8i a, v16i b, v8
// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_i32_16x16x128_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <16 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true, i1 false)
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <16 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true)
// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
// CHECK-GFX1250-NEXT: ret void
//
@@ -479,17 +468,6 @@ void test_amdgcn_swmmac_i32_16x16x128_iu8(global v8i* out, v8i a, v16i b, v8i c,
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true);
}
-// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_i32_16x16x128_iu8_clamp(
-// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <16 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true, i1 true)
-// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
-// CHECK-GFX1250-NEXT: ret void
-//
-void test_amdgcn_swmmac_i32_16x16x128_iu8_clamp(global v8i* out, v8i a, v16i b, v8i c, int index)
-{
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, 1);
-}
-
// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x64_f16(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x64.f16.v8f32.v16f16.v32f16.i32(i1 false, <16 x half> [[A:%.*]], i1 false, <32 x half> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index e3e0cc3f596c7..49ef2e571740c 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -112,9 +112,6 @@ void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c, int
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}}
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}}
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, 32.0f); // expected-error {{integer constant expression must have integer type, not 'double'}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, mod); // expected-error {{expression is not an integer constant expression}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, true, 32.0f); // expected-error {{too many arguments to function call, expected at most 8, have 9}}
}
void test_amdgcn_wmma_f32_16x16x128_f8f6f4(global v8f* out, v16i a, v16i b, v8f c, int mod)
@@ -289,9 +286,6 @@ void test_amdgcn_swmmac_i32_16x16x128_iu8(global v8i* out, v8i a, v16i b, v8i c,
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}}
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}}
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}}
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, 32.0f); // expected-error {{integer constant expression must have integer type, not 'double'}}
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, mod); // expected-error {{expression is not an integer constant expression}}
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, true, 32.0f); // expected-error {{too many arguments to function call, expected at most 9, have 10}}
}
void test_amdgcn_swmmac_f32_16x16x64_f16(global v8f* out, v16h a, v32h b, v8f c, int index, int mod)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index a8eba9ed126b7..f2650f678deaf 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3912,7 +3912,7 @@ def int_amdgcn_global_store_async_from_lds_b128 :
ClangBuiltin<"__builtin_amdgcn_global_store_async_from_lds_b128">, AMDGPUAsyncGlobalStoreFromLDS;
// WMMA intrinsics.
-class AMDGPUWmmaIntrinsicModsABClamp<LLVMType AB, LLVMType CD> :
+class AMDGPUWmmaIntrinsicModsAB<LLVMType AB, LLVMType CD> :
Intrinsic<
[CD], // %D
[
@@ -3923,9 +3923,8 @@ class AMDGPUWmmaIntrinsicModsABClamp<LLVMType AB, LLVMType CD> :
LLVMMatchType<0>, // %C
llvm_i1_ty, // matrix_a_reuse
llvm_i1_ty, // matrix_b_reuse
- llvm_i1_ty, // %clamp
],
- [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
+ [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]
>;
@@ -4090,7 +4089,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint
def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsABClamp<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>;
def int_amdgcn_wmma_f32_16x16x128_f8f6f4 : AMDGPUWmmaIntrinsicModsC_MatrixFMT;
def int_amdgcn_wmma_scale_f32_16x16x128_f8f6f4 : AMDGPUWmmaScaleIntrinsicModsC<llvm_i32_ty>;
def int_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4 : AMDGPUWmmaScaleIntrinsicModsC<llvm_i64_ty>;
@@ -4116,24 +4115,6 @@ class AMDGPUSWmmacIntrinsicABIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType I
ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>]
>;
-class AMDGPUSWmmacIntrinsicABIdxClamp<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
- Intrinsic<
- [CD], // %D
- [
- llvm_i1_ty, // %A_mod: 0 - none, 1 - neg
- A, // %A
- llvm_i1_ty, // %B_mod: 0 - none, 1 - neg
- B, // %B
- LLVMMatchType<0>, // %C
- Index, // %Sparsity index for A
- llvm_i1_ty, // matrix_a_reuse
- llvm_i1_ty, // matrix_b_reuse
- llvm_i1_ty, // %clamp
- ],
- [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison,
- ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<8>>]
->;
-
defset list<Intrinsic> AMDGPUSWMMACIntrinsicsGFX1250 = {
def int_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
def int_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
@@ -4148,7 +4129,7 @@ def int_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm
def int_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
def int_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
def int_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
-def int_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUSWmmacIntrinsicABIdxClamp<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d8b54c396df28..cbb7b6ee4f3f5 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -32,7 +32,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsRISCV.h"
@@ -1285,18 +1284,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
break; // No other 'amdgcn.atomic.*'
}
- // Legacy wmma iu intrinsics without the optional clamp operand.
- if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
- F->arg_size() == 7) {
- NewFn = nullptr;
- return true;
- }
- if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
- F->arg_size() == 8) {
- NewFn = nullptr;
- return true;
- }
-
if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
Name.consume_front("flat.atomic.")) {
if (Name.starts_with("fadd") ||
@@ -4633,50 +4620,6 @@ static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
//
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
- // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
- // for compatibility.
- auto UpgradeLegacyWMMAIUIntrinsicCall =
- [](Function *F, CallBase *CI, IRBuilder<> &Builder,
- ArrayRef<Type *> OverloadTys) -> Value * {
- // Prepare arguments, append clamp=0 for compatibility
- SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
- Args.push_back(Builder.getFalse());
-
- // Insert the declaration for the right overload types
- Function *NewDecl = Intrinsic::getOrInsertDeclaration(
- F->getParent(), F->getIntrinsicID(), OverloadTys);
-
- // Copy operand bundles if any
- SmallVector<OperandBundleDef, 1> Bundles;
- CI->getOperandBundlesAsDefs(Bundles);
-
- // Create the new call and copy calling properties
- auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
- NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
- NewCall->setCallingConv(CI->getCallingConv());
- NewCall->setAttributes(CI->getAttributes());
- NewCall->setDebugLoc(CI->getDebugLoc());
- NewCall->copyMetadata(*CI);
- return NewCall;
- };
-
- if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
- assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
- "intrinsic should have 7 arguments");
- Type *T1 = CI->getArgOperand(4)->getType();
- Type *T2 = CI->getArgOperand(1)->getType();
- return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
- }
- if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
- assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
- "intrinsic should have 8 arguments");
- Type *T1 = CI->getArgOperand(4)->getType();
- Type *T2 = CI->getArgOperand(1)->getType();
- Type *T3 = CI->getArgOperand(3)->getType();
- Type *T4 = CI->getArgOperand(5)->getType();
- return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
- }
-
AtomicRMWInst::BinOp RMWOp =
StringSwitch<AtomicRMWInst::BinOp>(Name)
.StartsWith("ds.fadd", AtomicRMWInst::FAdd)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 139e2d101a077.....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174674
More information about the cfe-commits
mailing list