[llvm] [AMDGPU] Narrow 64 bit math to 32 bit if profitable (PR #130577)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 10 03:51:35 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: None (Shoreshen)

<details>
<summary>Changes</summary>

For Add, Sub, Mul with Int64 type, if profitable, then do:
1. Trunc operands to Int32 type
2. Apply 32 bit Add/Sub/Mul
3. Zext to Int64 type

---
Full diff: https://github.com/llvm/llvm-project/pull/130577.diff


1 Files Affected:

- (modified) llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp (+44) 


``````````diff
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 6b0f568864fd5..73bd75f37cc71 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1224,6 +1224,49 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
   return false;
 }
 
+static bool tryNarrowMathIfNoOverflow(Instruction &I,
+                                      TargetTransformInfo &TTI) {
+  unsigned opc = I.getOpcode();
+  if (opc != Instruction::Add && opc != Instruction::Sub &&
+      opc != Instruction::Mul) {
+    return false;
+  }
+  LLVMContext &ctx = I.getContext();
+  Type *i64type = Type::getInt64Ty(ctx);
+  Type *i32type = Type::getInt32Ty(ctx);
+
+  if (I.getType() != i64type || !TTI.isTruncateFree(i64type, i32type)) {
+    return false;
+  }
+  InstructionCost costOp64 =
+      TTI.getArithmeticInstrCost(opc, i64type, TTI::TCK_RecipThroughput);
+  InstructionCost costOp32 =
+      TTI.getArithmeticInstrCost(opc, i32type, TTI::TCK_RecipThroughput);
+  InstructionCost costZext64 = TTI.getCastInstrCost(
+      Instruction::ZExt, i64type, i32type, TTI.getCastContextHint(&I),
+      TTI::TCK_RecipThroughput);
+  if ((costOp64 - costOp32) <= costZext64) {
+    return false;
+  }
+  uint64_t AndConst0, AndConst1;
+  Value *X;
+  if ((match(I.getOperand(0), m_And(m_Value(X), m_ConstantInt(AndConst0))) ||
+       match(I.getOperand(0), m_And(m_ConstantInt(AndConst0), m_Value(X)))) &&
+      AndConst0 <= 2147483647 &&
+      (match(I.getOperand(1), m_And(m_Value(X), m_ConstantInt(AndConst1))) ||
+       match(I.getOperand(1), m_And(m_ConstantInt(AndConst1), m_Value(X)))) &&
+      AndConst1 <= 2147483647) {
+    IRBuilder<> Builder(&I);
+    Value *trun0 = Builder.CreateTrunc(I.getOperand(0), i32type);
+    Value *trun1 = Builder.CreateTrunc(I.getOperand(1), i32type);
+    Value *arith32 = Builder.CreateAdd(trun0, trun1);
+    Value *zext64 = Builder.CreateZExt(arith32, i64type);
+    I.replaceAllUsesWith(zext64);
+    I.eraseFromParent();
+  }
+  return false;
+}
+
 /// This is the entry point for folds that could be implemented in regular
 /// InstCombine, but they are separated because they are not expected to
 /// occur frequently and/or have more than a constant-length pattern match.
@@ -1256,6 +1299,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
       // needs to be called at the end of this sequence, otherwise we may make
       // bugs.
       MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
+      MadeChange |= tryNarrowMathIfNoOverflow(I, TTI);
     }
   }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/130577


More information about the llvm-commits mailing list