[llvm] ea3683e - [RISCV] Improve type promotion for i32 clmulr/clmulh on RV64.

Tue Jul 18 10:39:39 PDT 2023

Author: Craig Topper
Date: 2023-07-18T10:39:25-07:00
New Revision: ea3683e98f107622ce54fa3d4c1457f4d597f808

URL: https://github.com/llvm/llvm-project/commit/ea3683e98f107622ce54fa3d4c1457f4d597f808
DIFF: https://github.com/llvm/llvm-project/commit/ea3683e98f107622ce54fa3d4c1457f4d597f808.diff

LOG: [RISCV] Improve type promotion for i32 clmulr/clmulh on RV64.

Instead of zero extending the inputs by masking. We can shift them
left instead. This is cheaper when we don't zext.w instruction.

This does make the case where the inputs are already zero extended
or freely zero extendable worse though.

Reviewed By: wangpc

Differential Revision: https://reviews.llvm.org/D155530

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
    llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8376b9add9e9ed..64cee392f55cf7 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10094,14 +10094,28 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
         return;
 
+      // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
+      // to the full 128-bit clmul result of multiplying two xlen values.
+      // Perform clmulr or clmulh on the shifted values. Finally, extract the
+      // upper 32 bits.
+      //
+      // The alternative is to mask the inputs to 32 bits and use clmul, but
+      // that requires two shifts to mask each input without zext.w.
+      // FIXME: If the inputs are known zero extended or could be freely
+      // zero extended, the mask form would be better.
       SDValue NewOp0 =
-          DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
       SDValue NewOp1 =
-          DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(2));
-      SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
-      unsigned ShAmt = IntNo == Intrinsic::riscv_clmulh ? 32 : 31;
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+      NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
+                           DAG.getConstant(32, DL, MVT::i64));
+      NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
+                           DAG.getConstant(32, DL, MVT::i64));
+      unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
+                                                      : RISCVISD::CLMULR;
+      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
       Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
-                        DAG.getConstant(ShAmt, DL, MVT::i64));
+                        DAG.getConstant(32, DL, MVT::i64));
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
       return;
     }

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
index 368d9fb2f7e02e..ef42c15b6b986a 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
@@ -19,12 +19,23 @@ define signext i32 @clmul32r(i32 signext %a, i32 signext %b) nounwind {
 ; RV64ZBC-LABEL: clmul32r:
 ; RV64ZBC:       # %bb.0:
 ; RV64ZBC-NEXT:    slli a1, a1, 32
-; RV64ZBC-NEXT:    srli a1, a1, 32
 ; RV64ZBC-NEXT:    slli a0, a0, 32
-; RV64ZBC-NEXT:    srli a0, a0, 32
-; RV64ZBC-NEXT:    clmul a0, a0, a1
-; RV64ZBC-NEXT:    srli a0, a0, 31
-; RV64ZBC-NEXT:    sext.w a0, a0
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    srai a0, a0, 32
+; RV64ZBC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+; FIXME: We could avoid the slli instructions by using clmul+srli+sext.w since
+; the inputs are zero extended.
+define signext i32 @clmul32r_zext(i32 zeroext %a, i32 zeroext %b) nounwind {
+; RV64ZBC-LABEL: clmul32r_zext:
+; RV64ZBC:       # %bb.0:
+; RV64ZBC-NEXT:    slli a1, a1, 32
+; RV64ZBC-NEXT:    slli a0, a0, 32
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    srai a0, a0, 32
 ; RV64ZBC-NEXT:    ret
   %tmp = call i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
   ret i32 %tmp

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
index a7ba0412bb2750..aa9e89bc20953c 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
@@ -44,13 +44,24 @@ define signext i32 @clmul32h(i32 signext %a, i32 signext %b) nounwind {
 ; RV64ZBC-ZBKC-LABEL: clmul32h:
 ; RV64ZBC-ZBKC:       # %bb.0:
 ; RV64ZBC-ZBKC-NEXT:    slli a1, a1, 32
-; RV64ZBC-ZBKC-NEXT:    srli a1, a1, 32
 ; RV64ZBC-ZBKC-NEXT:    slli a0, a0, 32
-; RV64ZBC-ZBKC-NEXT:    srli a0, a0, 32
-; RV64ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
 ; RV64ZBC-ZBKC-NEXT:    srai a0, a0, 32
 ; RV64ZBC-ZBKC-NEXT:    ret
   %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
   ret i32 %tmp
 }
 
+; FIXME: We could avoid the slli instructions by using clmul+srai since the
+; inputs are zero extended.
+define signext i32 @clmul32h_zext(i32 zeroext %a, i32 zeroext %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul32h_zext:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    slli a1, a1, 32
+; RV64ZBC-ZBKC-NEXT:    slli a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    srai a0, a0, 32
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}