[llvm] [AMDGPU][AggressiveInstCombine] Narrow 64 bit math to 32 bit if profitable (PR #130577)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 11 21:00:52 PDT 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/130577
>From fc7a5090fe0e6dfcc12894a57df8f43be6a0317e Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 10 Mar 2025 18:47:45 +0800
Subject: [PATCH 1/8] Narrow 64 bit math to 32 bit if profitable
---
.../AggressiveInstCombine.cpp | 44 +++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 6b0f568864fd5..73bd75f37cc71 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1224,6 +1224,49 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
return false;
}
+static bool tryNarrowMathIfNoOverflow(Instruction &I,
+ TargetTransformInfo &TTI) {
+ unsigned opc = I.getOpcode();
+ if (opc != Instruction::Add && opc != Instruction::Sub &&
+ opc != Instruction::Mul) {
+ return false;
+ }
+ LLVMContext &ctx = I.getContext();
+ Type *i64type = Type::getInt64Ty(ctx);
+ Type *i32type = Type::getInt32Ty(ctx);
+
+ if (I.getType() != i64type || !TTI.isTruncateFree(i64type, i32type)) {
+ return false;
+ }
+ InstructionCost costOp64 =
+ TTI.getArithmeticInstrCost(opc, i64type, TTI::TCK_RecipThroughput);
+ InstructionCost costOp32 =
+ TTI.getArithmeticInstrCost(opc, i32type, TTI::TCK_RecipThroughput);
+ InstructionCost costZext64 = TTI.getCastInstrCost(
+ Instruction::ZExt, i64type, i32type, TTI.getCastContextHint(&I),
+ TTI::TCK_RecipThroughput);
+ if ((costOp64 - costOp32) <= costZext64) {
+ return false;
+ }
+ uint64_t AndConst0, AndConst1;
+ Value *X;
+ if ((match(I.getOperand(0), m_And(m_Value(X), m_ConstantInt(AndConst0))) ||
+ match(I.getOperand(0), m_And(m_ConstantInt(AndConst0), m_Value(X)))) &&
+ AndConst0 <= 2147483647 &&
+ (match(I.getOperand(1), m_And(m_Value(X), m_ConstantInt(AndConst1))) ||
+ match(I.getOperand(1), m_And(m_ConstantInt(AndConst1), m_Value(X)))) &&
+ AndConst1 <= 2147483647) {
+ IRBuilder<> Builder(&I);
+ Value *trun0 = Builder.CreateTrunc(I.getOperand(0), i32type);
+ Value *trun1 = Builder.CreateTrunc(I.getOperand(1), i32type);
+ Value *arith32 = Builder.CreateAdd(trun0, trun1);
+ Value *zext64 = Builder.CreateZExt(arith32, i64type);
+ I.replaceAllUsesWith(zext64);
+ I.eraseFromParent();
+ }
+ return false;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
@@ -1256,6 +1299,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
+ MadeChange |= tryNarrowMathIfNoOverflow(I, TTI);
}
}
>From 0fe9dbc148420023d709ba467d064cc59d22c72a Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 10 Mar 2025 21:23:10 +0800
Subject: [PATCH 2/8] add tests
---
.../narrow_math_for_and.ll | 97 +++++++++++++++++++
1 file changed, 97 insertions(+)
create mode 100644 llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
diff --git a/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
new file mode 100644
index 0000000000000..43e90f77e32f2
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=aggressive-instcombine < %s | FileCheck %s
+
+
+define i64 @narrow_add(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_add(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+ %zext0 = and i64 %a, 2147483647
+ %zext1 = and i64 %b, 2147483647
+ %add = add i64 %zext0, %zext1
+ ret i64 %add
+}
+
+define i64 @narrow_mul(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_mul(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+ %zext0 = and i64 %a, 2147483647
+ %zext1 = and i64 %b, 2147483647
+ %mul = mul i64 %zext0, %zext1
+ ret i64 %mul
+}
+
+define i64 @narrow_sub(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_sub(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+ %zext0 = and i64 %a, 2147483647
+ %zext1 = and i64 %b, 2147483647
+ %sub = sub i64 %zext0, %zext1
+ ret i64 %sub
+}
+
+
+define i64 @no_narrow_add(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @no_narrow_add(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret i64 [[ADD]]
+;
+ %zext0 = and i64 %a, 2147483648
+ %zext1 = and i64 %b, 2147483648
+ %add = add i64 %zext0, %zext1
+ ret i64 %add
+}
+
+define i64 @no_narrow_mul(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @no_narrow_mul(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret i64 [[MUL]]
+;
+ %zext0 = and i64 %a, 2147483648
+ %zext1 = and i64 %b, 2147483648
+ %mul = mul i64 %zext0, %zext1
+ ret i64 %mul
+}
+
+define i64 @no_narrow_sub(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @no_narrow_sub(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %zext0 = and i64 %a, 2147483648
+ %zext1 = and i64 %b, 2147483648
+ %sub = sub i64 %zext0, %zext1
+ ret i64 %sub
+}
>From 9df0718d3a454b4d3e2930d12be3583069fedb7a Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 10 Mar 2025 23:57:09 +0800
Subject: [PATCH 3/8] fix mul, remove sub
---
.../AggressiveInstCombine.cpp | 28 ++++++++++++++++---
llvm/test/lit.cfg.py | 2 +-
2 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 73bd75f37cc71..56e97c4d64952 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1224,11 +1224,32 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
return false;
}
+static bool isSaveToNarrow(unsigned opc, uint64_t num1, uint64_t num2) {
+ if (num1 > 0xffffffff || num2 > 0xffffffff) {
+ // if `num > 0xffffffff`, then `%and = and i64 %a, num` may or may not have
+ // higher 32bit set. Which cause truncate possibly lose infomation
+ return false;
+ }
+ switch (opc) {
+ // If `%and = and i64 %a, num` where num <= 0xffffffff, then `%and` must be
+ // positive.
+ // Since add and mul both increasing function on positive integer domain and
+ // `%ai <= numi`, then if `(num1 op num2) <= 0xffffffff` we have `%a1 + %a2 <=
+ // 0xffffffff`
+ case Instruction::Add:
+ return (num1 + num2) <= 0xffffffff;
+ case Instruction::Mul:
+ return (num1 * num2) <= 0xffffffff;
+ break;
+ }
+
+ return false;
+}
+
static bool tryNarrowMathIfNoOverflow(Instruction &I,
TargetTransformInfo &TTI) {
unsigned opc = I.getOpcode();
- if (opc != Instruction::Add && opc != Instruction::Sub &&
- opc != Instruction::Mul) {
+ if (opc != Instruction::Add && opc != Instruction::Mul) {
return false;
}
LLVMContext &ctx = I.getContext();
@@ -1252,10 +1273,9 @@ static bool tryNarrowMathIfNoOverflow(Instruction &I,
Value *X;
if ((match(I.getOperand(0), m_And(m_Value(X), m_ConstantInt(AndConst0))) ||
match(I.getOperand(0), m_And(m_ConstantInt(AndConst0), m_Value(X)))) &&
- AndConst0 <= 2147483647 &&
(match(I.getOperand(1), m_And(m_Value(X), m_ConstantInt(AndConst1))) ||
match(I.getOperand(1), m_And(m_ConstantInt(AndConst1), m_Value(X)))) &&
- AndConst1 <= 2147483647) {
+ isSaveToNarrow(opc, AndConst0, AndConst1)) {
IRBuilder<> Builder(&I);
Value *trun0 = Builder.CreateTrunc(I.getOperand(0), i32type);
Value *trun1 = Builder.CreateTrunc(I.getOperand(1), i32type);
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index aad7a088551b2..50921879cd1f2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -466,7 +466,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("ascii")
+ readobj_out = readobj_cmd.stdout.read().decode("utf-8")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From a5084d29e809d09bf7c88629b28836a531e15004 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 10 Mar 2025 23:57:53 +0800
Subject: [PATCH 4/8] fix lit.cfg.py
---
llvm/test/lit.cfg.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 50921879cd1f2..aad7a088551b2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -466,7 +466,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("utf-8")
+ readobj_out = readobj_cmd.stdout.read().decode("ascii")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From 2e2d190bb817b757778360b831832c34d1b2bfa0 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 11 Mar 2025 01:36:36 +0800
Subject: [PATCH 5/8] fix test
---
.../narrow_math_for_and.ll | 66 ++++++++++++-------
llvm/test/lit.cfg.py | 2 +-
2 files changed, 42 insertions(+), 26 deletions(-)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
index 43e90f77e32f2..cdee5c20733ef 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
@@ -19,11 +19,28 @@ define i64 @narrow_add(i64 noundef %a, i64 noundef %b) {
ret i64 %add
}
+define i64 @narrow_add_1(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_add_1(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[TMP4]]
+;
+ %zext0 = and i64 %a, 2147483647
+ %zext1 = and i64 %b, 2147483648
+ %add = add i64 %zext0, %zext1
+ ret i64 %add
+}
+
define i64 @narrow_mul(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @narrow_mul(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
-; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 0
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
@@ -31,16 +48,16 @@ define i64 @narrow_mul(i64 noundef %a, i64 noundef %b) {
; CHECK-NEXT: ret i64 [[TMP4]]
;
%zext0 = and i64 %a, 2147483647
- %zext1 = and i64 %b, 2147483647
+ %zext1 = and i64 %b, 0
%mul = mul i64 %zext0, %zext1
ret i64 %mul
}
-define i64 @narrow_sub(i64 noundef %a, i64 noundef %b) {
-; CHECK-LABEL: define i64 @narrow_sub(
+define i64 @narrow_mul_1(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @narrow_mul_1(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
-; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
@@ -48,12 +65,11 @@ define i64 @narrow_sub(i64 noundef %a, i64 noundef %b) {
; CHECK-NEXT: ret i64 [[TMP4]]
;
%zext0 = and i64 %a, 2147483647
- %zext1 = and i64 %b, 2147483647
- %sub = sub i64 %zext0, %zext1
- ret i64 %sub
+ %zext1 = and i64 %b, 2
+ %mul = mul i64 %zext0, %zext1
+ ret i64 %mul
}
-
define i64 @no_narrow_add(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @no_narrow_add(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
@@ -68,30 +84,30 @@ define i64 @no_narrow_add(i64 noundef %a, i64 noundef %b) {
ret i64 %add
}
+define i64 @no_narrow_add_1(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: define i64 @no_narrow_add_1(
+; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 4294967295
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret i64 [[ADD]]
+;
+ %zext0 = and i64 %a, 4294967295
+ %zext1 = and i64 %b, 1
+ %add = add i64 %zext0, %zext1
+ ret i64 %add
+}
+
define i64 @no_narrow_mul(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @no_narrow_mul(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
-; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[ZEXT0]], [[ZEXT1]]
; CHECK-NEXT: ret i64 [[MUL]]
;
%zext0 = and i64 %a, 2147483648
- %zext1 = and i64 %b, 2147483648
+ %zext1 = and i64 %b, 2
%mul = mul i64 %zext0, %zext1
ret i64 %mul
}
-
-define i64 @no_narrow_sub(i64 noundef %a, i64 noundef %b) {
-; CHECK-LABEL: define i64 @no_narrow_sub(
-; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
-; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
-; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[ZEXT0]], [[ZEXT1]]
-; CHECK-NEXT: ret i64 [[SUB]]
-;
- %zext0 = and i64 %a, 2147483648
- %zext1 = and i64 %b, 2147483648
- %sub = sub i64 %zext0, %zext1
- ret i64 %sub
-}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index aad7a088551b2..50921879cd1f2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -466,7 +466,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("ascii")
+ readobj_out = readobj_cmd.stdout.read().decode("utf-8")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From 2063614767f690acd22c54c9706ad9a2e5d20099 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 11 Mar 2025 10:22:20 +0800
Subject: [PATCH 6/8] fix variable name
---
.../AggressiveInstCombine.cpp | 30 +++++++++----------
llvm/test/lit.cfg.py | 2 +-
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 56e97c4d64952..b1f13956a2940 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1253,20 +1253,20 @@ static bool tryNarrowMathIfNoOverflow(Instruction &I,
return false;
}
LLVMContext &ctx = I.getContext();
- Type *i64type = Type::getInt64Ty(ctx);
- Type *i32type = Type::getInt32Ty(ctx);
+ Type *I64Type = Type::getInt64Ty(ctx);
+ Type *I32Type = Type::getInt32Ty(ctx);
- if (I.getType() != i64type || !TTI.isTruncateFree(i64type, i32type)) {
+ if (I.getType() != I64Type || !TTI.isTruncateFree(I64Type, I32Type)) {
return false;
}
- InstructionCost costOp64 =
- TTI.getArithmeticInstrCost(opc, i64type, TTI::TCK_RecipThroughput);
- InstructionCost costOp32 =
- TTI.getArithmeticInstrCost(opc, i32type, TTI::TCK_RecipThroughput);
- InstructionCost costZext64 = TTI.getCastInstrCost(
- Instruction::ZExt, i64type, i32type, TTI.getCastContextHint(&I),
+ InstructionCost CostOp64 =
+ TTI.getArithmeticInstrCost(opc, I64Type, TTI::TCK_RecipThroughput);
+ InstructionCost CostOp32 =
+ TTI.getArithmeticInstrCost(opc, I32Type, TTI::TCK_RecipThroughput);
+ InstructionCost CostZext64 = TTI.getCastInstrCost(
+ Instruction::ZExt, I64Type, I32Type, TTI.getCastContextHint(&I),
TTI::TCK_RecipThroughput);
- if ((costOp64 - costOp32) <= costZext64) {
+ if ((CostOp64 - CostOp32) <= CostZext64) {
return false;
}
uint64_t AndConst0, AndConst1;
@@ -1277,11 +1277,11 @@ static bool tryNarrowMathIfNoOverflow(Instruction &I,
match(I.getOperand(1), m_And(m_ConstantInt(AndConst1), m_Value(X)))) &&
isSaveToNarrow(opc, AndConst0, AndConst1)) {
IRBuilder<> Builder(&I);
- Value *trun0 = Builder.CreateTrunc(I.getOperand(0), i32type);
- Value *trun1 = Builder.CreateTrunc(I.getOperand(1), i32type);
- Value *arith32 = Builder.CreateAdd(trun0, trun1);
- Value *zext64 = Builder.CreateZExt(arith32, i64type);
- I.replaceAllUsesWith(zext64);
+ Value *Trun0 = Builder.CreateTrunc(I.getOperand(0), I32Type);
+ Value *Trun1 = Builder.CreateTrunc(I.getOperand(1), I32Type);
+ Value *Arith32 = Builder.CreateAdd(Trun0, Trun1);
+ Value *Zext64 = Builder.CreateZExt(Arith32, I64Type);
+ I.replaceAllUsesWith(Zext64);
I.eraseFromParent();
}
return false;
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 50921879cd1f2..aad7a088551b2 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -466,7 +466,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("utf-8")
+ readobj_out = readobj_cmd.stdout.read().decode("ascii")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From 0ac2f9ed50b36f15abfe168bf8a65eeb76530d4c Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 11 Mar 2025 17:37:27 +0800
Subject: [PATCH 7/8] fix comments
---
.../AggressiveInstCombine.cpp | 95 ++++++++--------
.../narrow_math_for_and.ll | 105 +++++++++++++++++-
2 files changed, 148 insertions(+), 52 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index b1f13956a2940..5277318071be9 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1224,67 +1224,66 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
return false;
}
-static bool isSaveToNarrow(unsigned opc, uint64_t num1, uint64_t num2) {
- if (num1 > 0xffffffff || num2 > 0xffffffff) {
- // if `num > 0xffffffff`, then `%and = and i64 %a, num` may or may not have
- // higher 32bit set. Which cause truncate possibly lose infomation
+static bool tryNarrowMathIfNoOverflow(Instruction &I, TargetTransformInfo &TTI,
+ const DataLayout &DL) {
+ unsigned opc = I.getOpcode();
+ Type *OldType = I.getType();
+ if (opc != Instruction::Add && opc != Instruction::Mul &&
+ !OldType->isIntOrIntVectorTy()) {
return false;
}
+ unsigned OrigBit = OldType->getScalarSizeInBits();
+ unsigned MaxBitsNeed = OrigBit;
switch (opc) {
- // If `%and = and i64 %a, num` where num <= 0xffffffff, then `%and` must be
- // positive.
- // Since add and mul both increasing function on positive integer domain and
- // `%ai <= numi`, then if `(num1 op num2) <= 0xffffffff` we have `%a1 + %a2 <=
- // 0xffffffff`
case Instruction::Add:
- return (num1 + num2) <= 0xffffffff;
+ MaxBitsNeed = KnownBits::add(computeKnownBits(I.getOperand(0), DL),
+ computeKnownBits(I.getOperand(1), DL))
+ .countMaxActiveBits();
+ break;
case Instruction::Mul:
- return (num1 * num2) <= 0xffffffff;
+ MaxBitsNeed = KnownBits::mul(computeKnownBits(I.getOperand(0), DL),
+ computeKnownBits(I.getOperand(1), DL))
+ .countMaxActiveBits();
+ break;
+ default:
break;
}
- return false;
-}
+ MaxBitsNeed = std::max<unsigned>(bit_ceil(MaxBitsNeed), 8);
-static bool tryNarrowMathIfNoOverflow(Instruction &I,
- TargetTransformInfo &TTI) {
- unsigned opc = I.getOpcode();
- if (opc != Instruction::Add && opc != Instruction::Mul) {
+ if (OrigBit <= MaxBitsNeed) {
return false;
}
- LLVMContext &ctx = I.getContext();
- Type *I64Type = Type::getInt64Ty(ctx);
- Type *I32Type = Type::getInt32Ty(ctx);
- if (I.getType() != I64Type || !TTI.isTruncateFree(I64Type, I32Type)) {
- return false;
- }
- InstructionCost CostOp64 =
- TTI.getArithmeticInstrCost(opc, I64Type, TTI::TCK_RecipThroughput);
- InstructionCost CostOp32 =
- TTI.getArithmeticInstrCost(opc, I32Type, TTI::TCK_RecipThroughput);
- InstructionCost CostZext64 = TTI.getCastInstrCost(
- Instruction::ZExt, I64Type, I32Type, TTI.getCastContextHint(&I),
- TTI::TCK_RecipThroughput);
- if ((CostOp64 - CostOp32) <= CostZext64) {
+ Type *NewType = I.getType()->getWithNewBitWidth(MaxBitsNeed);
+
+ // Old cost
+ InstructionCost OldCost =
+ TTI.getArithmeticInstrCost(opc, OldType, TTI::TCK_RecipThroughput);
+ // New cost of new op
+ InstructionCost NewCost =
+ TTI.getArithmeticInstrCost(opc, NewType, TTI::TCK_RecipThroughput);
+ // New cost of narrowing 2 operands (use trunc)
+ NewCost += TTI.getCastInstrCost(Instruction::Trunc, NewType, OldType,
+ TTI.getCastContextHint(&I),
+ TTI::TCK_RecipThroughput) *
+ 2;
+ // New cost of zext narrowed result to original type
+ NewCost += TTI.getCastInstrCost(Instruction::ZExt, OldType, NewType,
+ TTI.getCastContextHint(&I),
+ TTI::TCK_RecipThroughput);
+ if (NewCost >= OldCost) {
return false;
}
- uint64_t AndConst0, AndConst1;
- Value *X;
- if ((match(I.getOperand(0), m_And(m_Value(X), m_ConstantInt(AndConst0))) ||
- match(I.getOperand(0), m_And(m_ConstantInt(AndConst0), m_Value(X)))) &&
- (match(I.getOperand(1), m_And(m_Value(X), m_ConstantInt(AndConst1))) ||
- match(I.getOperand(1), m_And(m_ConstantInt(AndConst1), m_Value(X)))) &&
- isSaveToNarrow(opc, AndConst0, AndConst1)) {
- IRBuilder<> Builder(&I);
- Value *Trun0 = Builder.CreateTrunc(I.getOperand(0), I32Type);
- Value *Trun1 = Builder.CreateTrunc(I.getOperand(1), I32Type);
- Value *Arith32 = Builder.CreateAdd(Trun0, Trun1);
- Value *Zext64 = Builder.CreateZExt(Arith32, I64Type);
- I.replaceAllUsesWith(Zext64);
- I.eraseFromParent();
- }
- return false;
+ IRBuilder<> Builder(&I);
+ Value *Trun0 = Builder.CreateTrunc(I.getOperand(0), NewType);
+ Value *Trun1 = Builder.CreateTrunc(I.getOperand(1), NewType);
+ Value *Arith = Builder.CreateBinOp((Instruction::BinaryOps)opc, Trun0, Trun1);
+
+ Value *Zext = Builder.CreateZExt(Arith, OldType);
+ I.replaceAllUsesWith(Zext);
+ I.eraseFromParent();
+ return true;
}
/// This is the entry point for folds that could be implemented in regular
@@ -1319,7 +1318,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
- MadeChange |= tryNarrowMathIfNoOverflow(I, TTI);
+ MadeChange |= tryNarrowMathIfNoOverflow(I, TTI, DL);
}
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
index cdee5c20733ef..38df58356559e 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=aggressive-instcombine < %s | FileCheck %s
+; REQUIRES: amdgpu-registered-target
define i64 @narrow_add(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @narrow_add(
@@ -36,19 +37,53 @@ define i64 @narrow_add_1(i64 noundef %a, i64 noundef %b) {
ret i64 %add
}
+define <2 x i64> @narrow_add_vec(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK-LABEL: define <2 x i64> @narrow_add_vec(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and <2 x i64> [[A]], <i64 2147483647, i64 30>
+; CHECK-NEXT: [[ZEXT1:%.*]] = and <2 x i64> [[B]], <i64 2147483647, i64 2147483646>
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[ZEXT0]] to <2 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[ZEXT1]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP4]]
+;
+ %zext0 = and <2 x i64> %a, <i64 2147483647, i64 30>
+ %zext1 = and <2 x i64> %b, <i64 2147483647, i64 2147483646>
+ %add = add <2 x i64> %zext0, %zext1
+ ret <2 x i64> %add
+}
+
+define <2 x i32> @narrow_add_vec_1(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: define <2 x i32> @narrow_add_vec_1(
+; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and <2 x i32> [[A]], <i32 16384, i32 16383>
+; CHECK-NEXT: [[ZEXT1:%.*]] = and <2 x i32> [[B]], <i32 16384, i32 16385>
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[ZEXT0]] to <2 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[ZEXT1]] to <2 x i16>
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[TMP4]]
+;
+ %zext0 = and <2 x i32> %a, <i32 16384, i32 16383>
+ %zext1 = and <2 x i32> %b, <i32 16384, i32 16385>
+ %add = add <2 x i32> %zext0, %zext1
+ ret <2 x i32> %add
+}
+
define i64 @narrow_mul(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @narrow_mul(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
-; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 0
+; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: ret i64 [[TMP4]]
;
%zext0 = and i64 %a, 2147483647
- %zext1 = and i64 %b, 0
+ %zext1 = and i64 %b, 2
%mul = mul i64 %zext0, %zext1
ret i64 %mul
}
@@ -60,7 +95,7 @@ define i64 @narrow_mul_1(i64 noundef %a, i64 noundef %b) {
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: ret i64 [[TMP4]]
;
@@ -70,6 +105,40 @@ define i64 @narrow_mul_1(i64 noundef %a, i64 noundef %b) {
ret i64 %mul
}
+define <2 x i64> @narrow_mul_vec(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK-LABEL: define <2 x i64> @narrow_mul_vec(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and <2 x i64> [[A]], <i64 47483647, i64 50>
+; CHECK-NEXT: [[ZEXT1:%.*]] = and <2 x i64> [[B]], <i64 80, i64 20>
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[ZEXT0]] to <2 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[ZEXT1]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP4]]
+;
+ %zext0 = and <2 x i64> %a, <i64 47483647, i64 50>
+ %zext1 = and <2 x i64> %b, <i64 80, i64 20>
+ %mul = mul <2 x i64> %zext0, %zext1
+ ret <2 x i64> %mul
+}
+
+define <2 x i32> @narrow_add_mul_1(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: define <2 x i32> @narrow_add_mul_1(
+; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and <2 x i32> [[A]], splat (i32 16384)
+; CHECK-NEXT: [[ZEXT1:%.*]] = and <2 x i32> [[B]], <i32 3, i32 2>
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[ZEXT0]] to <2 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[ZEXT1]] to <2 x i16>
+; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i16> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[TMP4]]
+;
+ %zext0 = and <2 x i32> %a, <i32 16384, i32 16384>
+ %zext1 = and <2 x i32> %b, <i32 3, i32 2>
+ %mul = mul <2 x i32> %zext0, %zext1
+ ret <2 x i32> %mul
+}
+
define i64 @no_narrow_add(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @no_narrow_add(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
@@ -98,6 +167,20 @@ define i64 @no_narrow_add_1(i64 noundef %a, i64 noundef %b) {
ret i64 %add
}
+define <2 x i64> @no_narrow_add_vec(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK-LABEL: define <2 x i64> @no_narrow_add_vec(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and <2 x i64> [[A]], <i64 2147483648, i64 30>
+; CHECK-NEXT: [[ZEXT1:%.*]] = and <2 x i64> [[B]], <i64 2147483648, i64 2147483646>
+; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret <2 x i64> [[ADD]]
+;
+ %zext0 = and <2 x i64> %a, <i64 2147483648, i64 30>
+ %zext1 = and <2 x i64> %b, <i64 2147483648, i64 2147483646>
+ %add = add <2 x i64> %zext0, %zext1
+ ret <2 x i64> %add
+}
+
define i64 @no_narrow_mul(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @no_narrow_mul(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
@@ -111,3 +194,17 @@ define i64 @no_narrow_mul(i64 noundef %a, i64 noundef %b) {
%mul = mul i64 %zext0, %zext1
ret i64 %mul
}
+
+define <2 x i64> @no_narrow_mul_vec(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK-LABEL: define <2 x i64> @no_narrow_mul_vec(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ZEXT0:%.*]] = and <2 x i64> [[A]], <i64 32768, i64 50>
+; CHECK-NEXT: [[ZEXT1:%.*]] = and <2 x i64> [[B]], <i64 131072, i64 20>
+; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[ZEXT0]], [[ZEXT1]]
+; CHECK-NEXT: ret <2 x i64> [[MUL]]
+;
+ %zext0 = and <2 x i64> %a, <i64 32768, i64 50>
+ %zext1 = and <2 x i64> %b, <i64 131072, i64 20>
+ %mul = mul <2 x i64> %zext0, %zext1
+ ret <2 x i64> %mul
+}
>From f7d076945352c3e937eb787e59e1f85f1891f5fb Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 11 Mar 2025 18:38:01 +0800
Subject: [PATCH 8/8] fix comments
---
.../AggressiveInstCombine.cpp | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 5277318071be9..7c9a901f18cb9 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1226,15 +1226,15 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
static bool tryNarrowMathIfNoOverflow(Instruction &I, TargetTransformInfo &TTI,
const DataLayout &DL) {
- unsigned opc = I.getOpcode();
+ unsigned Opc = I.getOpcode();
Type *OldType = I.getType();
- if (opc != Instruction::Add && opc != Instruction::Mul &&
- !OldType->isIntOrIntVectorTy()) {
+
+ if (Opc != Instruction::Add && Opc != Instruction::Mul)
return false;
- }
+
unsigned OrigBit = OldType->getScalarSizeInBits();
unsigned MaxBitsNeed = OrigBit;
- switch (opc) {
+ switch (Opc) {
case Instruction::Add:
MaxBitsNeed = KnownBits::add(computeKnownBits(I.getOperand(0), DL),
computeKnownBits(I.getOperand(1), DL))
@@ -1251,18 +1251,17 @@ static bool tryNarrowMathIfNoOverflow(Instruction &I, TargetTransformInfo &TTI,
MaxBitsNeed = std::max<unsigned>(bit_ceil(MaxBitsNeed), 8);
- if (OrigBit <= MaxBitsNeed) {
+ if (OrigBit <= MaxBitsNeed)
return false;
- }
Type *NewType = I.getType()->getWithNewBitWidth(MaxBitsNeed);
// Old cost
InstructionCost OldCost =
- TTI.getArithmeticInstrCost(opc, OldType, TTI::TCK_RecipThroughput);
+ TTI.getArithmeticInstrCost(Opc, OldType, TTI::TCK_RecipThroughput);
// New cost of new op
InstructionCost NewCost =
- TTI.getArithmeticInstrCost(opc, NewType, TTI::TCK_RecipThroughput);
+ TTI.getArithmeticInstrCost(Opc, NewType, TTI::TCK_RecipThroughput);
// New cost of narrowing 2 operands (use trunc)
NewCost += TTI.getCastInstrCost(Instruction::Trunc, NewType, OldType,
TTI.getCastContextHint(&I),
@@ -1278,7 +1277,7 @@ static bool tryNarrowMathIfNoOverflow(Instruction &I, TargetTransformInfo &TTI,
IRBuilder<> Builder(&I);
Value *Trun0 = Builder.CreateTrunc(I.getOperand(0), NewType);
Value *Trun1 = Builder.CreateTrunc(I.getOperand(1), NewType);
- Value *Arith = Builder.CreateBinOp((Instruction::BinaryOps)opc, Trun0, Trun1);
+ Value *Arith = Builder.CreateBinOp((Instruction::BinaryOps)Opc, Trun0, Trun1);
Value *Zext = Builder.CreateZExt(Arith, OldType);
I.replaceAllUsesWith(Zext);
More information about the llvm-commits
mailing list