[llvm] [ISel] Commute FMUL and inserting zero into vector lane (PR #146096)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 18 06:52:11 PDT 2025


================
@@ -1186,7 +1186,69 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
   ret double %res
 }
 
+define <4 x float> @fmul_insert_zero(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fmul_insert_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d2, #0000000000000000
+; CHECK-NEXT:    mov.s v0[3], v2[0]
+; CHECK-NEXT:    fmul.4s v0, v0, v1
+; CHECK-NEXT:    ret
+  %mul = fmul fast <4 x float> %A, %B
+  %mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
+  ret <4 x float> %mul_set_lane
+}
+
+define <4 x float> @fmul_insert_zero_same(<4 x float> %A) {
+; CHECK-LABEL: fmul_insert_zero_same:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    mov.s v0[3], v1[0]
+; CHECK-NEXT:    fmul.4s v0, v0, v0
+; CHECK-NEXT:    ret
+  %mul = fmul fast <4 x float> %A, %A
+  %mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
+  ret <4 x float> %mul_set_lane
+}
+
+define <4 x float> @fmul_insert_zero1(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+; CHECK-LABEL: fmul_insert_zero1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d3, #0000000000000000
+; CHECK-NEXT:    fsub.4s v0, v2, v0
+; CHECK-NEXT:    mov.s v1[3], v3[0]
+; CHECK-NEXT:    fmul.4s v0, v1, v0
+; CHECK-NEXT:    ret
+  %sub = fsub <4 x float> %C, %A
+  %mul = fmul fast <4 x float> %B, %sub
+  %mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
+  ret <4 x float> %mul_set_lane
+}
 
+define <4 x float> @fmul_insert_zero2(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fmul_insert_zero2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d2, #0000000000000000
+; CHECK-NEXT:    mov.s v0[3], v2[0]
+; CHECK-NEXT:    fmul.4s v0, v0, v1
+; CHECK-NEXT:    fsub.4s v0, v1, v0
+; CHECK-NEXT:    ret
+  %mul = fmul fast <4 x float> %B, %A
----------------
fhahn wrote:

`fast` includes additional flags, could you add a test with just the needed flags (`nnan nsz ninf`), then I think it should be good to go!

https://github.com/llvm/llvm-project/pull/146096


More information about the llvm-commits mailing list