[llvm] [X86] Combine `uitofp <v x i32> to <v x half>` (PR #121809)

Tue Jan 7 02:32:44 PST 2025

================
@@ -1777,6 +1777,33 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
   assert((BW == 64 || BW == 32) &&
          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
 
+  // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
+  // UINT_TO_FP with a larger float and round to the smaller type
+  if ((!IsStrict &&
+       TLI.getOperationAction(ISD::FMUL, DstVT) == TargetLowering::Expand) ||
+      (IsStrict && TLI.getOperationAction(ISD::STRICT_FMUL, DstVT) ==
+                       TargetLowering::Expand)) {
+    EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
+    SDValue UIToFP;
+    SDValue Result;
+    SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true);
+    EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT);
+    if (IsStrict) {
+      UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other},
+                           {Node->getOperand(0), Src});
+      Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other},
+                           {Node->getOperand(0), UIToFP, TargetZero});
+      Results.push_back(Result);
+      Results.push_back(Result.getValue(1));
+    } else {
+      UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src);
+      Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero);
+      Results.push_back(Result);
+    }
----------------
abhishek-kaushik22 wrote:

On `avx2` I don't see an `fmul` instead there is `X86ISD::BLENDI`
```
Legalizing vector op: t9: v8f32 = uint_to_fp t2
Trying custom legalization
Creating constant: t11: i32 = Constant<1258291200>
Creating new node: t12: v8i32 = BUILD_VECTOR Constant:i32<1258291200>, Constant:i32<1258291200>, Constant:i32<1258291200>, Constant:i32<1258291200>, Constant:i32<1258291200>, Constant:i32<1258291200>, Constant:i32<1258291200>, Constant:i32<1258291200>
Creating constant: t13: i32 = Constant<1392508928>
Creating new node: t14: v8i32 = BUILD_VECTOR Constant:i32<1392508928>, Constant:i32<1392508928>, Constant:i32<1392508928>, Constant:i32<1392508928>, Constant:i32<1392508928>, Constant:i32<1392508928>, Constant:i32<1392508928>, Constant:i32<1392508928>
Creating constant: t15: i32 = Constant<16>
Creating new node: t16: v8i32 = BUILD_VECTOR Constant:i32<16>, Constant:i32<16>, Constant:i32<16>, Constant:i32<16>, Constant:i32<16>, Constant:i32<16>, Constant:i32<16>, Constant:i32<16>
Creating new node: t17: v8i32 = srl t2, t16
Creating new node: t18: v16i16 = bitcast t12
Creating new node: t19: v16i16 = bitcast t2
Creating constant: t20: i8 = TargetConstant<-86>
Creating new node: t21: v16i16 = X86ISD::BLENDI t19, t18, TargetConstant:i8<-86>
Creating new node: t22: v16i16 = bitcast t14
Creating new node: t23: v16i16 = bitcast t17
Creating new node: t24: v16i16 = X86ISD::BLENDI t23, t22, TargetConstant:i8<-86>
Creating new node: t26: v8f32 = BUILD_VECTOR ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>
Creating fp constant: t26: v8f32 = BUILD_VECTOR ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>, ConstantFP:f32<5.497642e+11>
Creating new node: t27: v8f32 = bitcast t24
Creating new node: t28: v8f32 = bitcast t21
Creating new node: t29: v8f32 = fsub t27, t26
Creating new node: t30: v8f32 = fadd t28, t29
```

https://github.com/llvm/llvm-project/pull/121809