[llvm] [AMDGPU]: Accept constant zero bytes in v_perm OrCombine (PR #66533)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 6 21:18:43 PST 2024


================
@@ -11610,6 +11610,29 @@ calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex = 0,
     return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
   }
 
+  case ISD::EXTRACT_VECTOR_ELT: {
+    auto IdxOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+    if (!IdxOp)
+      return std::nullopt;
+    auto VecIdx = IdxOp->getZExtValue();
+    auto ScalarSize = Op.getScalarValueSizeInBits();
+
+    assert((ScalarSize >= 8) && !(ScalarSize % 8));
+
+    if (ScalarSize < 32) {
+      // TODO: support greater than 32 bit sources
+      if ((VecIdx + 1) * ScalarSize > 32)
+        return std::nullopt;
+
+      SrcIndex = VecIdx * ScalarSize / 8 + SrcIndex;
+      return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
+    }
+
+    // The scalar is 32 bits, so just use the scalar
+    // TODO: support greater than 32 bit sources
+    return ByteProvider<SDValue>::getSrc(Op, DestByte, SrcIndex);
+  }
+
----------------
arsenm wrote:

Extra blank line 

https://github.com/llvm/llvm-project/pull/66533


More information about the llvm-commits mailing list