[llvm] [AArch64] Improve mull generation (PR #114997)

Wed Nov 6 08:15:30 PST 2024

================
@@ -2405,25 +2400,16 @@ define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) {
 define <2 x i64> @lsr(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEON-LABEL: lsr:
 ; CHECK-NEON:       // %bb.0:
-; CHECK-NEON-NEXT:    ushr v0.2d, v0.2d, #32
-; CHECK-NEON-NEXT:    ushr v1.2d, v1.2d, #32
-; CHECK-NEON-NEXT:    fmov x10, d1
-; CHECK-NEON-NEXT:    fmov x11, d0
-; CHECK-NEON-NEXT:    mov x8, v1.d[1]
-; CHECK-NEON-NEXT:    mov x9, v0.d[1]
-; CHECK-NEON-NEXT:    umull x10, w11, w10
-; CHECK-NEON-NEXT:    umull x8, w9, w8
-; CHECK-NEON-NEXT:    fmov d0, x10
-; CHECK-NEON-NEXT:    mov v0.d[1], x8
+; CHECK-NEON-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEON-NEXT:    shrn v1.2s, v1.2d, #32
+; CHECK-NEON-NEXT:    umull v0.2d, v0.2s, v1.2s
----------------
david-arm wrote:

Do you also plan to change the vectoriser cost model to reflect the lower cost? Looks like a fairly simple pattern that might encourage more vectorisation.

https://github.com/llvm/llvm-project/pull/114997