[llvm] [AMDGPU] Narrow 64 bit math to 32 bit if profitable (PR #130577)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 06:23:33 PDT 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/130577
>From fc7a5090fe0e6dfcc12894a57df8f43be6a0317e Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 10 Mar 2025 18:47:45 +0800
Subject: [PATCH 1/2] Narrow 64 bit math to 32 bit if profitable
---
.../AggressiveInstCombine.cpp | 44 +++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 6b0f568864fd5..73bd75f37cc71 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1224,6 +1224,49 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
return false;
}
+static bool tryNarrowMathIfNoOverflow(Instruction &I,
+ TargetTransformInfo &TTI) {
+ unsigned opc = I.getOpcode();
+ if (opc != Instruction::Add && opc != Instruction::Sub &&
+ opc != Instruction::Mul) {
+ return false;
+ }
+ LLVMContext &ctx = I.getContext();
+ Type *i64type = Type::getInt64Ty(ctx);
+ Type *i32type = Type::getInt32Ty(ctx);
+
+ if (I.getType() != i64type || !TTI.isTruncateFree(i64type, i32type)) {
+ return false;
+ }
+ InstructionCost costOp64 =
+ TTI.getArithmeticInstrCost(opc, i64type, TTI::TCK_RecipThroughput);
+ InstructionCost costOp32 =
+ TTI.getArithmeticInstrCost(opc, i32type, TTI::TCK_RecipThroughput);
+ InstructionCost costZext64 = TTI.getCastInstrCost(
+ Instruction::ZExt, i64type, i32type, TTI.getCastContextHint(&I),
+ TTI::TCK_RecipThroughput);
+ if ((costOp64 - costOp32) <= costZext64) {
+ return false;
+ }
+ uint64_t AndConst0, AndConst1;
+ Value *X;
+ if ((match(I.getOperand(0), m_And(m_Value(X), m_ConstantInt(AndConst0))) ||
+ match(I.getOperand(0), m_And(m_ConstantInt(AndConst0), m_Value(X)))) &&
+ AndConst0 <= 2147483647 &&
+ (match(I.getOperand(1), m_And(m_Value(X), m_ConstantInt(AndConst1))) ||
+ match(I.getOperand(1), m_And(m_ConstantInt(AndConst1), m_Value(X)))) &&
+ AndConst1 <= 2147483647) {
+ IRBuilder<> Builder(&I);
+ Value *trun0 = Builder.CreateTrunc(I.getOperand(0), i32type);
+ Value *trun1 = Builder.CreateTrunc(I.getOperand(1), i32type);
+ Value *arith32 = Builder.CreateAdd(trun0, trun1);
+ Value *zext64 = Builder.CreateZExt(arith32, i64type);
+ I.replaceAllUsesWith(zext64);
+ I.eraseFromParent();
+ }
+ return false;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
@@ -1256,6 +1299,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
+ MadeChange |= tryNarrowMathIfNoOverflow(I, TTI);
}
}
>From 0fe9dbc148420023d709ba467d064cc59d22c72a Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 10 Mar 2025 21:23:10 +0800
Subject: [PATCH 2/2] add tests
---
.../narrow_math_for_and.ll | 97 +++++++++++++++++++
1 file changed, 97 insertions(+)
create mode 100644 llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
diff --git a/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
new file mode 100644
index 0000000000000..43e90f77e32f2
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=aggressive-instcombine < %s | FileCheck %s
+
+
+define i64 @narrow_add(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_add(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+ %zext0 = and i64 %a, 2147483647
+ %zext1 = and i64 %b, 2147483647
+ %add = add i64 %zext0, %zext1
+ ret i64 %add
+}
+
+define i64 @narrow_mul(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_mul(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+ %zext0 = and i64 %a, 2147483647
+ %zext1 = and i64 %b, 2147483647
+ %mul = mul i64 %zext0, %zext1
+ ret i64 %mul
+}
+
+define i64 @narrow_sub(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_sub(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+ %zext0 = and i64 %a, 2147483647
+ %zext1 = and i64 %b, 2147483647
+ %sub = sub i64 %zext0, %zext1
+ ret i64 %sub
+}
+
+
+define i64 @no_narrow_add(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @no_narrow_add(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret i64 [[ADD]]
+;
+ %zext0 = and i64 %a, 2147483648
+ %zext1 = and i64 %b, 2147483648
+ %add = add i64 %zext0, %zext1
+ ret i64 %add
+}
+
+define i64 @no_narrow_mul(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @no_narrow_mul(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret i64 [[MUL]]
+;
+ %zext0 = and i64 %a, 2147483648
+ %zext1 = and i64 %b, 2147483648
+ %mul = mul i64 %zext0, %zext1
+ ret i64 %mul
+}
+
+define i64 @no_narrow_sub(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @no_narrow_sub(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %zext0 = and i64 %a, 2147483648
+ %zext1 = and i64 %b, 2147483648
+ %sub = sub i64 %zext0, %zext1
+ ret i64 %sub
+}
More information about the llvm-commits
mailing list