[PATCH] D111823: [AMDGPU] Fix width check for signed mul24 generation.
Abinav Puthan Purayil via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 14 10:23:39 PDT 2021
abinavpp created this revision.
abinavpp added reviewers: arsenm, foad, rampitec.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
abinavpp requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
This changes fixes a case in which the highest set bit of the original
result is at bit 31 and sign-extending the mul24 for it would make the
result negative.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D111823
Files:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
+++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
@@ -239,6 +239,39 @@
ret i64 %mul
}
+define i64 @smul24_i64_3(i64 %lhs, i64 %rhs) {
+; SI-LABEL: @smul24_i64_3(
+; SI-NEXT: [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
+; SI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
+; SI-NEXT: [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
+; SI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
+; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
+; SI-NEXT: ret i64 [[MUL]]
+;
+; VI-LABEL: @smul24_i64_3(
+; VI-NEXT: [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
+; VI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
+; VI-NEXT: [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
+; VI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
+; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
+; VI-NEXT: ret i64 [[MUL]]
+;
+; DISABLED-LABEL: @smul24_i64_3(
+; DISABLED-NEXT: [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
+; DISABLED-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
+; DISABLED-NEXT: [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
+; DISABLED-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
+; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
+; DISABLED-NEXT: ret i64 [[MUL]]
+;
+ %lhs.trunc = trunc i64 %lhs to i16
+ %lhs24 = sext i16 %lhs.trunc to i64
+ %rhs.trunc = trunc i64 %rhs to i17
+ %rhs24 = sext i17 %rhs.trunc to i64
+ %mul = mul i64 %lhs24, %rhs24
+ ret i64 %mul
+}
+
define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
; SI-LABEL: @umul24_i64(
; SI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -509,10 +509,10 @@
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
- // TODO: Should this try to match mulhi24?
if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
- // The 24-bit mul intrinsics yields the low-order 32 bits. The result's bit
- // width should not exceed 32 if `Size` > 32.
+ // The mul24 instruction yields the low-order 32 bits. If the original
+ // result and the destination is wider than 32 bits, the mul24 would
+ // truncate the result.
if (Size > 32 &&
numBitsUnsigned(LHS, Size) + numBitsUnsigned(RHS, Size) > 32) {
return false;
@@ -520,7 +520,10 @@
IntrID = Intrinsic::amdgcn_mul_u24;
} else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
- if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 31) {
+ // The original result is positive if the width is wider than 32 and the
+ // highest set bit of the original result is at bit 31. Generating mul24 and
+ // sign-extending it would yield a negative value.
+ if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 30) {
return false;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D111823.379759.patch
Type: text/x-patch
Size: 3225 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211014/5f47f5f1/attachment.bin>
More information about the llvm-commits
mailing list