[llvm] [X86] Lower i512 ADD/SUB using Kogge-Stone on AVX512 (PR #174761)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 4 05:02:15 PST 2026
================
@@ -34259,6 +34265,72 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getBitcast(VT, Op));
return;
}
+ case ISD::ADD:
+ case ISD::SUB: {
+ // Use Kogge-Stone parallel carry/borrow propagation for i512 add/sub.
+ // Article : https://www.numberworld.org/y-cruncher/internals/addition.html
+ // related work : combineStore -> if (VT == MVT::i256 || VT == MVT::i512)
+ // TODO: ISD::UADDO_CARRY
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ bool IsAdd = Opc == ISD::ADD;
+ assert(Subtarget.useAVX512Regs() && "AVX512 required");
+ assert(VT == MVT::i512 && "Unexpected VT!");
+
+ if (!mayFoldIntoVector(LHS, Subtarget) ||
+ !mayFoldIntoVector(RHS, Subtarget))
+ return;
+
+ MVT VecVT = MVT::v8i64;
+ MVT BoolVT = MVT::v8i1;
+
+ if (isOneConstant(RHS)) {
+ RHS = DAG.getAllOnesConstant(dl, VecVT);
+ ;
+ Opc = (IsAdd ? ISD::SUB : ISD::ADD);
+ IsAdd = !IsAdd;
+ // LHS + 1 => LHS - (- 1 , LHS - 1 => LHS + (- 1)
+ // we utilize var `AllOnes` to do less work, this optimization makes snese
+ // since inc/dec operations are common :)
+ }
+
+ SDValue Vec0 = DAG.getBitcast(VecVT, LHS);
+ SDValue Vec1 = DAG.getBitcast(VecVT, RHS);
+
+ SDValue Partial = DAG.getNode(Opc, dl, VecVT, Vec0, Vec1);
+
+ ISD::CondCode CarryCC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SDValue Carry = DAG.getSetCC(dl, BoolVT, Partial, Vec0, CarryCC);
+
+ SDValue PropCmp = IsAdd ? DAG.getAllOnesConstant(dl, VecVT)
+ : DAG.getConstant(0, dl, VecVT);
+ SDValue Propagate = DAG.getSetCC(dl, BoolVT, Partial, PropCmp, ISD::SETEQ);
+
+ SDValue CarryIn = DAG.getBitcast(MVT::i8, Carry);
+ SDValue PropIn = DAG.getBitcast(MVT::i8, Propagate);
+ CarryIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, CarryIn);
+ PropIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, PropIn);
+
+ SDValue ShiftedCarry =
+ DAG.getNode(ISD::SHL, dl, MVT::i32, CarryIn,
+ DAG.getShiftAmountConstant(1, MVT::i8, dl));
+ SDValue CarryOut =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, ShiftedCarry, PropIn);
+
+ SDValue CorrMask = DAG.getNode(ISD::XOR, dl, MVT::i32, PropIn, CarryOut);
+ CorrMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CorrMask);
+ SDValue CorrVec = DAG.getNode(ISD::BITCAST, dl, BoolVT, CorrMask);
----------------
RKSimon wrote:
```suggestion
SDValue CorrVec = DAG.getBitcast(BoolVT, CorrMask);
```
https://github.com/llvm/llvm-project/pull/174761
More information about the llvm-commits
mailing list