[PATCH] D155530: [RISCV] Improve type promotion for i32 clmulr/clmulh on RV64.

Mon Jul 17 17:58:51 PDT 2023

craig.topper created this revision.
craig.topper added reviewers: asb, VincentWu, wangpc.
Herald added subscribers: jobnoorman, luke, vkmr, frasercrmck, luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, shiva0217, kito-cheng, niosHD, sabuasal, simoncook, johnrusso, rbar, hiraditya, arichardson.
Herald added a project: All.
craig.topper requested review of this revision.
Herald added subscribers: eopXD, MaskRay.
Herald added a project: LLVM.

Instead of zero extending the inputs by masking. We can shift them
left instead. This is cheaper when we don't zext.w instruction.

This does make the case where the inputs are already zero extended
or freely zero extendable worse though.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155530

Files:
  llvm/lib/Target/RISCV/RISCVISelLowering.cpp
  llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
  llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll


Index: llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
===================================================================

--- llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
+++ llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
@@ -44,10 +44,8 @@
 ; RV64ZBC-ZBKC-LABEL: clmul32h:
 ; RV64ZBC-ZBKC:       # %bb.0:
 ; RV64ZBC-ZBKC-NEXT:    slli a1, a1, 32
-; RV64ZBC-ZBKC-NEXT:    srli a1, a1, 32
 ; RV64ZBC-ZBKC-NEXT:    slli a0, a0, 32
-; RV64ZBC-ZBKC-NEXT:    srli a0, a0, 32
-; RV64ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
 ; RV64ZBC-ZBKC-NEXT:    srai a0, a0, 32
 ; RV64ZBC-ZBKC-NEXT:    ret
   %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
Index: llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
===================================================================
--- llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
+++ llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
@@ -19,12 +19,9 @@
 ; RV64ZBC-LABEL: clmul32r:
 ; RV64ZBC:       # %bb.0:
 ; RV64ZBC-NEXT:    slli a1, a1, 32
-; RV64ZBC-NEXT:    srli a1, a1, 32
 ; RV64ZBC-NEXT:    slli a0, a0, 32
-; RV64ZBC-NEXT:    srli a0, a0, 32
-; RV64ZBC-NEXT:    clmul a0, a0, a1
-; RV64ZBC-NEXT:    srli a0, a0, 31
-; RV64ZBC-NEXT:    sext.w a0, a0
+; RV64ZBC-NEXT:    clmulr a0, a0, a1
+; RV64ZBC-NEXT:    srai a0, a0, 32
 ; RV64ZBC-NEXT:    ret
   %tmp = call i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
   ret i32 %tmp
Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10054,14 +10054,28 @@
       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
         return;
 
+      // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
+      // to the full 128-bit clmul result of multiplying two xlen values.
+      // Perform clmulr or clmulh on the shifted values. Finally, extract the
+      // upper 32 bits.
+      //
+      // The alternative is to mask the inputs to 32 bits and use clmul, but
+      // that requires two shifts to mask each input without zext.w.
+      // FIXME: If the inputs are known zero extended or could be freely
+      // zero extended, the mask form would be better.
       SDValue NewOp0 =
-          DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
       SDValue NewOp1 =
-          DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(2));
-      SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
-      unsigned ShAmt = IntNo == Intrinsic::riscv_clmulh ? 32 : 31;
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+      NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
+                           DAG.getConstant(32, DL, MVT::i64));
+      NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
+                           DAG.getConstant(32, DL, MVT::i64));
+      unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
+                                                      : RISCVISD::CLMULR;
+      SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
       Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
-                        DAG.getConstant(ShAmt, DL, MVT::i64));
+                        DAG.getConstant(32, DL, MVT::i64));
       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
       return;
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155530.541285.patch
Type: text/x-patch
Size: 3516 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230718/a7b72d07/attachment.bin>