[llvm] 6b83fe5 - [RISCV] Strength reduce mul by 2^n + 2/4/8 + 1 (#88911)

Tue Apr 16 11:03:57 PDT 2024

Author: Philip Reames
Date: 2024-04-16T11:03:53-07:00
New Revision: 6b83fe552990966fdad0e5693a79b02b87d9526e

URL: https://github.com/llvm/llvm-project/commit/6b83fe552990966fdad0e5693a79b02b87d9526e
DIFF: https://github.com/llvm/llvm-project/commit/6b83fe552990966fdad0e5693a79b02b87d9526e.diff

LOG: [RISCV] Strength reduce mul by 2^n + 2/4/8 + 1 (#88911)

With zba, we can expand this to (add (shl X, C1), (shXadd X, X)).

Note that this is our first expansion to a three instruction sequence. I
believe this to general be a reasonable tradeoff for most architectures,
but we may want to (someday) consider a tuning flag here.

I plan to support 2^n + (2/4/8 + 1) eventually as well, but that comes
behind 2^N - 2^M. Both are also three instruction sequences.

---------

Co-authored-by: Min-Yih Hsu <min at myhsu.dev>

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rv64zba.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index de2ad639f0d6c8..dc7c6f83b98579 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13437,6 +13437,43 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
     }
   }
+
+  // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
+  // Matched in tablegen, avoid perturbing patterns.
+  switch (MulAmt) {
+  case 11:
+  case 13:
+  case 19:
+  case 21:
+  case 25:
+  case 27:
+  case 29:
+  case 37:
+  case 41:
+  case 45:
+  case 73:
+  case 91:
+    return SDValue();
+  default:
+    break;
+  }
+
+  // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
+  if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
+    unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
+    if (ScaleShift >= 1 && ScaleShift < 4) {
+      unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
+      SDLoc DL(N);
+      SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                   DAG.getConstant(ShiftAmt, DL, VT));
+      SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                   DAG.getConstant(ScaleShift, DL, VT));
+      return DAG.getNode(
+          ISD::ADD, DL, VT, Shift1,
+          DAG.getNode(ISD::ADD, DL, VT, Shift2, N->getOperand(0)));
+    }
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index c3c757656be933..b4c80b60e0bad5 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -598,31 +598,52 @@ define i64 @mul125(i64 %a) {
 }
 
 define i64 @mul131(i64 %a) {
-; CHECK-LABEL: mul131:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 131
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul131:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 131
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul131:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh1add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 131
   ret i64 %c
 }
 
 define i64 @mul133(i64 %a) {
-; CHECK-LABEL: mul133:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 133
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul133:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 133
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul133:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 133
   ret i64 %c
 }
 
 define i64 @mul137(i64 %a) {
-; CHECK-LABEL: mul137:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 137
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul137:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 137
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul137:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a1, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 7
+; RV64ZBA-NEXT:    add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 137
   ret i64 %c
 }