[llvm] [AMDGPU]Try to simplify select v32i4 case by legalizing v16i4 (PR #173328)

Tue Jan 20 08:26:17 PST 2026

================
@@ -17048,6 +17044,53 @@ SDValue SITargetLowering::performSelectCombine(SDNode *N,
                          SelectLHS, SelectRHS);
 }
 
+// Try to convert vXiY into vZi32 with X * Y = Z * 32
+SDValue SITargetLowering::castTypeSelect(SDNode *N, DAGCombinerInfo &DCI,
+                                         SDValue &Cond, SDValue &TrueVal,
+                                         SDValue &FalseVal) const {
+  if (N->getNumValues() != 1)
+    return SDValue();
+
+  EVT ResultVT = N->getValueType(0);
+  if (ResultVT.isSimple() || !ResultVT.isVector() ||
+      !ResultVT.isPow2VectorType())
+    return SDValue();
+
+  EVT EltVT = ResultVT.getVectorElementType();
+  unsigned EltBitSize = EltVT.getSizeInBits();
+  ElementCount NumElts = ResultVT.getVectorElementCount();
+  if (!EltVT.isInteger() || !isPowerOf2_32(EltBitSize) || NumElts.isScalar())
+    return SDValue();
----------------
arsenm wrote:

I don't think the power of 2 sizes matters. This is probably profitable for any illegal type 

https://github.com/llvm/llvm-project/pull/173328