[llvm] [RISCV] Add DAG combine to convert (iN reduce.add (zext (vXi1 A to vXiN)) into vcpop.m (PR #127497)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 19 11:21:30 PST 2025


================
@@ -18100,25 +18100,38 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
 //   (iX ctpop (bitcast (vXi1 A)))
 // ->
 //   (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
+// and
+//   (iN reduce.add (zext (vXi1 A to vXiN))
+// ->
+//   (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
 // FIXME: It's complicated to match all the variations of this after type
 // legalization so we only handle the pre-type legalization pattern, but that
 // requires the fixed vector type to be legal.
-static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG,
-                                         const RISCVSubtarget &Subtarget) {
+static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
+                              const RISCVSubtarget &Subtarget) {
+  unsigned Opc = N->getOpcode();
+  assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
+         "Unexpected opcode");
   EVT VT = N->getValueType(0);
   if (!VT.isScalarInteger())
     return SDValue();
 
   SDValue Src = N->getOperand(0);
 
-  // Peek through zero_extend. It doesn't change the count.
-  if (Src.getOpcode() == ISD::ZERO_EXTEND)
-    Src = Src.getOperand(0);
+  if (Opc == ISD::CTPOP) {
+    // Peek through zero_extend. It doesn't change the count.
+    if (Src.getOpcode() == ISD::ZERO_EXTEND)
+      Src = Src.getOperand(0);
 
-  if (Src.getOpcode() != ISD::BITCAST)
-    return SDValue();
+    if (Src.getOpcode() != ISD::BITCAST)
+      return SDValue();
+    Src = Src.getOperand(0);
+  } else if (Opc == ISD::VECREDUCE_ADD) {
+    if (Src.getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
----------------
topperc wrote:

The return type of your function is declared as i4 without any attributes. Only the lowest 4 bits of the result are required to be valid. The upper bits can be any value. If you add a `zeroext` attribute to the return value an `andi` instruction will be generated to clear the upper bits.

```
define zeroext i4 @test_narrow_v16i1(<16 x i1> %x) {
entry:
    %a = zext <16 x i1> %x to <16 x i4>
    %b = call i4 @llvm.vector.reduce.add.v16i4(<16 x i4> %a)
    ret i4 %b
}
```

Type promotion makes it the responsibility of the consumer to zero or sign extend upper bits if needed. With no attributes, the consumer is an `any_extend` so the upper bits don't need to be touched. With `zeroext` the consumer is a `zero_extend` so the bits need to be cleared to match the semantics of the unpromoted `zero_extend`

https://github.com/llvm/llvm-project/pull/127497


More information about the llvm-commits mailing list