[llvm] [SelectionDAG] Use ExpandIntRes_CLMUL to expand vector CLMUL via narrower legal types (PR #184468)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 01:05:24 PST 2026
================
@@ -5504,209 +4808,108 @@ define <4 x i32> @clmulh_v4i32_neon(<4 x i32> %a, <4 x i32> %b) nounwind {
define <2 x i32> @clmulh_v2i32_neon(<2 x i32> %a, <2 x i32> %b) nounwind {
; CHECK-NEON-LABEL: clmulh_v2i32_neon:
; CHECK-NEON: // %bb.0:
-; CHECK-NEON-NEXT: mov w8, #2 // =0x2
-; CHECK-NEON-NEXT: mov w9, #1 // =0x1
-; CHECK-NEON-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-NEON-NEXT: dup v2.2d, x8
-; CHECK-NEON-NEXT: dup v3.2d, x9
-; CHECK-NEON-NEXT: mov w8, #4 // =0x4
-; CHECK-NEON-NEXT: mov w9, #8 // =0x8
-; CHECK-NEON-NEXT: dup v4.2d, x8
-; CHECK-NEON-NEXT: mov w8, #16 // =0x10
-; CHECK-NEON-NEXT: dup v5.2d, x9
-; CHECK-NEON-NEXT: dup v6.2d, x8
-; CHECK-NEON-NEXT: mov w8, #32 // =0x20
-; CHECK-NEON-NEXT: and v2.16b, v1.16b, v2.16b
-; CHECK-NEON-NEXT: and v3.16b, v1.16b, v3.16b
-; CHECK-NEON-NEXT: dup v7.2d, x8
-; CHECK-NEON-NEXT: and v4.16b, v1.16b, v4.16b
-; CHECK-NEON-NEXT: mov w8, #64 // =0x40
-; CHECK-NEON-NEXT: mov w9, #512 // =0x200
-; CHECK-NEON-NEXT: and v5.16b, v1.16b, v5.16b
-; CHECK-NEON-NEXT: and v6.16b, v1.16b, v6.16b
-; CHECK-NEON-NEXT: dup v16.2d, x8
-; CHECK-NEON-NEXT: xtn v2.2s, v2.2d
-; CHECK-NEON-NEXT: xtn v3.2s, v3.2d
-; CHECK-NEON-NEXT: and v7.16b, v1.16b, v7.16b
-; CHECK-NEON-NEXT: xtn v4.2s, v4.2d
-; CHECK-NEON-NEXT: mov w8, #128 // =0x80
-; CHECK-NEON-NEXT: xtn v5.2s, v5.2d
-; CHECK-NEON-NEXT: xtn v6.2s, v6.2d
-; CHECK-NEON-NEXT: dup v17.2d, x8
-; CHECK-NEON-NEXT: xtn v7.2s, v7.2d
-; CHECK-NEON-NEXT: mov w8, #256 // =0x100
-; CHECK-NEON-NEXT: and v16.16b, v1.16b, v16.16b
-; CHECK-NEON-NEXT: umull v2.2d, v0.2s, v2.2s
-; CHECK-NEON-NEXT: umull v3.2d, v0.2s, v3.2s
-; CHECK-NEON-NEXT: dup v18.2d, x8
-; CHECK-NEON-NEXT: umull v4.2d, v0.2s, v4.2s
-; CHECK-NEON-NEXT: mov w8, #2048 // =0x800
-; CHECK-NEON-NEXT: and v17.16b, v1.16b, v17.16b
-; CHECK-NEON-NEXT: umull v5.2d, v0.2s, v5.2s
-; CHECK-NEON-NEXT: umull v6.2d, v0.2s, v6.2s
-; CHECK-NEON-NEXT: umull v7.2d, v0.2s, v7.2s
-; CHECK-NEON-NEXT: eor v2.16b, v3.16b, v2.16b
-; CHECK-NEON-NEXT: xtn v17.2s, v17.2d
-; CHECK-NEON-NEXT: eor v3.16b, v4.16b, v5.16b
-; CHECK-NEON-NEXT: xtn v4.2s, v16.2d
-; CHECK-NEON-NEXT: dup v16.2d, x8
-; CHECK-NEON-NEXT: mov w8, #4096 // =0x1000
-; CHECK-NEON-NEXT: and v5.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT: dup v18.2d, x9
-; CHECK-NEON-NEXT: dup v19.2d, x8
-; CHECK-NEON-NEXT: mov w8, #8192 // =0x2000
-; CHECK-NEON-NEXT: umull v17.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT: eor v2.16b, v2.16b, v3.16b
-; CHECK-NEON-NEXT: xtn v3.2s, v5.2d
-; CHECK-NEON-NEXT: eor v5.16b, v6.16b, v7.16b
-; CHECK-NEON-NEXT: and v6.16b, v1.16b, v16.16b
-; CHECK-NEON-NEXT: and v16.16b, v1.16b, v19.16b
-; CHECK-NEON-NEXT: and v7.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT: dup v18.2d, x8
-; CHECK-NEON-NEXT: mov w8, #1024 // =0x400
-; CHECK-NEON-NEXT: umull v4.2d, v0.2s, v4.2s
-; CHECK-NEON-NEXT: dup v19.2d, x8
-; CHECK-NEON-NEXT: xtn v6.2s, v6.2d
-; CHECK-NEON-NEXT: mov w8, #16384 // =0x4000
-; CHECK-NEON-NEXT: xtn v16.2s, v16.2d
-; CHECK-NEON-NEXT: and v18.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT: dup v20.2d, x8
-; CHECK-NEON-NEXT: mov w8, #65536 // =0x10000
-; CHECK-NEON-NEXT: umull v3.2d, v0.2s, v3.2s
-; CHECK-NEON-NEXT: xtn v7.2s, v7.2d
-; CHECK-NEON-NEXT: dup v21.2d, x8
-; CHECK-NEON-NEXT: mov w8, #131072 // =0x20000
-; CHECK-NEON-NEXT: and v19.16b, v1.16b, v19.16b
-; CHECK-NEON-NEXT: eor v4.16b, v5.16b, v4.16b
-; CHECK-NEON-NEXT: umull v5.2d, v0.2s, v6.2s
-; CHECK-NEON-NEXT: dup v6.2d, x8
-; CHECK-NEON-NEXT: umull v16.2d, v0.2s, v16.2s
-; CHECK-NEON-NEXT: mov w8, #32768 // =0x8000
-; CHECK-NEON-NEXT: xtn v18.2s, v18.2d
-; CHECK-NEON-NEXT: and v20.16b, v1.16b, v20.16b
-; CHECK-NEON-NEXT: eor v3.16b, v17.16b, v3.16b
-; CHECK-NEON-NEXT: xtn v17.2s, v19.2d
-; CHECK-NEON-NEXT: and v19.16b, v1.16b, v21.16b
-; CHECK-NEON-NEXT: and v6.16b, v1.16b, v6.16b
-; CHECK-NEON-NEXT: dup v21.2d, x8
-; CHECK-NEON-NEXT: mov w8, #262144 // =0x40000
-; CHECK-NEON-NEXT: umull v7.2d, v0.2s, v7.2s
-; CHECK-NEON-NEXT: eor v2.16b, v2.16b, v4.16b
-; CHECK-NEON-NEXT: xtn v20.2s, v20.2d
-; CHECK-NEON-NEXT: eor v5.16b, v5.16b, v16.16b
-; CHECK-NEON-NEXT: umull v16.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT: dup v18.2d, x8
-; CHECK-NEON-NEXT: mov w8, #4194304 // =0x400000
-; CHECK-NEON-NEXT: xtn v19.2s, v19.2d
-; CHECK-NEON-NEXT: xtn v6.2s, v6.2d
-; CHECK-NEON-NEXT: dup v22.2d, x8
-; CHECK-NEON-NEXT: mov w8, #8388608 // =0x800000
-; CHECK-NEON-NEXT: umull v17.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT: dup v23.2d, x8
-; CHECK-NEON-NEXT: mov w8, #524288 // =0x80000
-; CHECK-NEON-NEXT: umull v20.2d, v0.2s, v20.2s
-; CHECK-NEON-NEXT: and v18.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT: eor v3.16b, v3.16b, v7.16b
-; CHECK-NEON-NEXT: eor v5.16b, v5.16b, v16.16b
-; CHECK-NEON-NEXT: and v7.16b, v1.16b, v21.16b
-; CHECK-NEON-NEXT: umull v16.2d, v0.2s, v19.2s
-; CHECK-NEON-NEXT: umull v6.2d, v0.2s, v6.2s
-; CHECK-NEON-NEXT: and v19.16b, v1.16b, v22.16b
-; CHECK-NEON-NEXT: and v21.16b, v1.16b, v23.16b
-; CHECK-NEON-NEXT: xtn v18.2s, v18.2d
-; CHECK-NEON-NEXT: eor v4.16b, v3.16b, v17.16b
-; CHECK-NEON-NEXT: movi v23.4s, #128, lsl #24
-; CHECK-NEON-NEXT: eor v3.16b, v5.16b, v20.16b
-; CHECK-NEON-NEXT: xtn v5.2s, v7.2d
-; CHECK-NEON-NEXT: dup v7.2d, x8
-; CHECK-NEON-NEXT: mov w8, #16777216 // =0x1000000
-; CHECK-NEON-NEXT: xtn v17.2s, v19.2d
-; CHECK-NEON-NEXT: xtn v19.2s, v21.2d
-; CHECK-NEON-NEXT: dup v20.2d, x8
-; CHECK-NEON-NEXT: mov w8, #33554432 // =0x2000000
-; CHECK-NEON-NEXT: eor v6.16b, v16.16b, v6.16b
-; CHECK-NEON-NEXT: umull v16.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT: dup v18.2d, x8
-; CHECK-NEON-NEXT: mov w8, #1048576 // =0x100000
-; CHECK-NEON-NEXT: dup v21.2d, x8
-; CHECK-NEON-NEXT: mov w8, #2097152 // =0x200000
-; CHECK-NEON-NEXT: and v7.16b, v1.16b, v7.16b
-; CHECK-NEON-NEXT: and v20.16b, v1.16b, v20.16b
-; CHECK-NEON-NEXT: dup v22.2d, x8
-; CHECK-NEON-NEXT: mov w8, #67108864 // =0x4000000
-; CHECK-NEON-NEXT: umull v17.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT: umull v19.2d, v0.2s, v19.2s
-; CHECK-NEON-NEXT: fneg v23.2d, v23.2d
-; CHECK-NEON-NEXT: eor v6.16b, v6.16b, v16.16b
-; CHECK-NEON-NEXT: and v16.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT: xtn v7.2s, v7.2d
-; CHECK-NEON-NEXT: xtn v18.2s, v20.2d
-; CHECK-NEON-NEXT: dup v20.2d, x8
-; CHECK-NEON-NEXT: mov w8, #134217728 // =0x8000000
-; CHECK-NEON-NEXT: and v21.16b, v1.16b, v21.16b
-; CHECK-NEON-NEXT: and v22.16b, v1.16b, v22.16b
-; CHECK-NEON-NEXT: umull v5.2d, v0.2s, v5.2s
-; CHECK-NEON-NEXT: eor v17.16b, v17.16b, v19.16b
-; CHECK-NEON-NEXT: xtn v16.2s, v16.2d
-; CHECK-NEON-NEXT: dup v19.2d, x8
-; CHECK-NEON-NEXT: mov w8, #268435456 // =0x10000000
-; CHECK-NEON-NEXT: and v20.16b, v1.16b, v20.16b
-; CHECK-NEON-NEXT: umull v7.2d, v0.2s, v7.2s
-; CHECK-NEON-NEXT: umull v18.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT: dup v24.2d, x8
-; CHECK-NEON-NEXT: mov w8, #536870912 // =0x20000000
-; CHECK-NEON-NEXT: dup v25.2d, x8
-; CHECK-NEON-NEXT: mov w8, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT: and v19.16b, v1.16b, v19.16b
-; CHECK-NEON-NEXT: xtn v21.2s, v21.2d
-; CHECK-NEON-NEXT: xtn v20.2s, v20.2d
-; CHECK-NEON-NEXT: dup v26.2d, x8
-; CHECK-NEON-NEXT: umull v16.2d, v0.2s, v16.2s
-; CHECK-NEON-NEXT: eor v6.16b, v6.16b, v7.16b
-; CHECK-NEON-NEXT: eor v2.16b, v2.16b, v4.16b
-; CHECK-NEON-NEXT: eor v17.16b, v17.16b, v18.16b
-; CHECK-NEON-NEXT: xtn v18.2s, v22.2d
-; CHECK-NEON-NEXT: xtn v19.2s, v19.2d
-; CHECK-NEON-NEXT: and v22.16b, v1.16b, v24.16b
-; CHECK-NEON-NEXT: and v24.16b, v1.16b, v25.16b
-; CHECK-NEON-NEXT: and v25.16b, v1.16b, v26.16b
-; CHECK-NEON-NEXT: umull v21.2d, v0.2s, v21.2s
-; CHECK-NEON-NEXT: umull v20.2d, v0.2s, v20.2s
-; CHECK-NEON-NEXT: and v1.16b, v1.16b, v23.16b
-; CHECK-NEON-NEXT: eor v7.16b, v17.16b, v16.16b
-; CHECK-NEON-NEXT: eor v3.16b, v3.16b, v5.16b
-; CHECK-NEON-NEXT: xtn v16.2s, v22.2d
-; CHECK-NEON-NEXT: xtn v17.2s, v24.2d
-; CHECK-NEON-NEXT: xtn v22.2s, v25.2d
-; CHECK-NEON-NEXT: umull v4.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT: umull v18.2d, v0.2s, v19.2s
-; CHECK-NEON-NEXT: xtn v1.2s, v1.2d
-; CHECK-NEON-NEXT: eor v5.16b, v6.16b, v21.16b
-; CHECK-NEON-NEXT: eor v6.16b, v7.16b, v20.16b
-; CHECK-NEON-NEXT: eor v2.16b, v2.16b, v3.16b
-; CHECK-NEON-NEXT: umull v7.2d, v0.2s, v16.2s
-; CHECK-NEON-NEXT: umull v16.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT: umull v17.2d, v0.2s, v22.2s
-; CHECK-NEON-NEXT: eor v3.16b, v5.16b, v4.16b
-; CHECK-NEON-NEXT: eor v4.16b, v6.16b, v18.16b
-; CHECK-NEON-NEXT: umull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT: eor v1.16b, v2.16b, v3.16b
-; CHECK-NEON-NEXT: eor v2.16b, v4.16b, v7.16b
-; CHECK-NEON-NEXT: eor v3.16b, v16.16b, v17.16b
-; CHECK-NEON-NEXT: eor v1.16b, v1.16b, v2.16b
-; CHECK-NEON-NEXT: eor v0.16b, v3.16b, v0.16b
-; CHECK-NEON-NEXT: eor v0.16b, v1.16b, v0.16b
-; CHECK-NEON-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-NEON-NEXT: rev32 v1.8b, v1.8b
+; CHECK-NEON-NEXT: rev32 v0.8b, v0.8b
+; CHECK-NEON-NEXT: rbit v1.8b, v1.8b
+; CHECK-NEON-NEXT: rbit v2.8b, v0.8b
+; CHECK-NEON-NEXT: xtn v0.4h, v1.4s
+; CHECK-NEON-NEXT: xtn v3.4h, v2.4s
+; CHECK-NEON-NEXT: shrn v16.4h, v2.4s, #16
+; CHECK-NEON-NEXT: shrn v17.4h, v1.4s, #16
+; CHECK-NEON-NEXT: xtn v20.8b, v16.8h
+; CHECK-NEON-NEXT: shrn v16.8b, v16.8h, #8
+; CHECK-NEON-NEXT: rev16 v4.8b, v0.8b
+; CHECK-NEON-NEXT: rev16 v5.8b, v3.8b
+; CHECK-NEON-NEXT: xtn v1.8b, v0.8h
+; CHECK-NEON-NEXT: xtn v21.8b, v17.8h
+; CHECK-NEON-NEXT: xtn v2.8b, v3.8h
+; CHECK-NEON-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-NEON-NEXT: shrn v3.8b, v3.8h, #8
+; CHECK-NEON-NEXT: shrn v17.8b, v17.8h, #8
+; CHECK-NEON-NEXT: rbit v4.8b, v4.8b
+; CHECK-NEON-NEXT: rbit v5.8b, v5.8b
+; CHECK-NEON-NEXT: rbit v22.8b, v1.8b
+; CHECK-NEON-NEXT: rbit v23.8b, v21.8b
+; CHECK-NEON-NEXT: rbit v24.8b, v2.8b
+; CHECK-NEON-NEXT: pmul v16.8b, v16.8b, v1.8b
+; CHECK-NEON-NEXT: pmul v25.8b, v20.8b, v0.8b
+; CHECK-NEON-NEXT: pmul v17.8b, v2.8b, v17.8b
+; CHECK-NEON-NEXT: pmul v0.8b, v2.8b, v0.8b
+; CHECK-NEON-NEXT: xtn v6.8b, v4.8h
+; CHECK-NEON-NEXT: xtn v7.8b, v5.8h
+; CHECK-NEON-NEXT: shrn v5.8b, v5.8h, #8
+; CHECK-NEON-NEXT: shrn v4.8b, v4.8h, #8
+; CHECK-NEON-NEXT: pmul v23.8b, v24.8b, v23.8b
+; CHECK-NEON-NEXT: rbit v18.8b, v6.8b
+; CHECK-NEON-NEXT: rbit v19.8b, v7.8b
+; CHECK-NEON-NEXT: pmul v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT: pmul v4.8b, v7.8b, v4.8b
+; CHECK-NEON-NEXT: pmul v6.8b, v7.8b, v6.8b
+; CHECK-NEON-NEXT: rbit v7.8b, v23.8b
+; CHECK-NEON-NEXT: pmul v18.8b, v19.8b, v18.8b
+; CHECK-NEON-NEXT: rbit v19.8b, v20.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT: ushll v6.8h, v6.8b, #0
+; CHECK-NEON-NEXT: ushr v7.8b, v7.8b, #1
+; CHECK-NEON-NEXT: rbit v18.8b, v18.8b
+; CHECK-NEON-NEXT: pmul v19.8b, v19.8b, v22.8b
+; CHECK-NEON-NEXT: ushr v5.8b, v18.8b, #1
+; CHECK-NEON-NEXT: rbit v18.8b, v19.8b
+; CHECK-NEON-NEXT: pmul v19.8b, v3.8b, v21.8b
+; CHECK-NEON-NEXT: pmul v3.8b, v3.8b, v1.8b
+; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT: eor v5.8b, v25.8b, v16.8b
+; CHECK-NEON-NEXT: eor v16.8b, v17.8b, v19.8b
+; CHECK-NEON-NEXT: pmul v17.8b, v24.8b, v22.8b
+; CHECK-NEON-NEXT: ushr v18.8b, v18.8b, #1
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: shll v4.8h, v4.8b, #8
+; CHECK-NEON-NEXT: eor v5.8b, v18.8b, v5.8b
+; CHECK-NEON-NEXT: pmul v18.8b, v20.8b, v1.8b
+; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT: pmul v16.8b, v2.8b, v21.8b
+; CHECK-NEON-NEXT: pmul v1.8b, v2.8b, v1.8b
+; CHECK-NEON-NEXT: orr v4.16b, v6.16b, v4.16b
+; CHECK-NEON-NEXT: rbit v6.8b, v17.8b
+; CHECK-NEON-NEXT: shll v5.8h, v5.8b, #8
+; CHECK-NEON-NEXT: shll v7.8h, v7.8b, #8
+; CHECK-NEON-NEXT: ushll v17.8h, v18.8b, #0
+; CHECK-NEON-NEXT: rev16 v4.8b, v4.8b
+; CHECK-NEON-NEXT: ushll v16.8h, v16.8b, #0
+; CHECK-NEON-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-NEON-NEXT: ushr v3.8b, v6.8b, #1
+; CHECK-NEON-NEXT: orr v5.16b, v17.16b, v5.16b
+; CHECK-NEON-NEXT: orr v6.16b, v16.16b, v7.16b
+; CHECK-NEON-NEXT: rbit v4.8b, v4.8b
+; CHECK-NEON-NEXT: eor v0.8b, v3.8b, v0.8b
+; CHECK-NEON-NEXT: eor v2.8b, v6.8b, v5.8b
+; CHECK-NEON-NEXT: shll v0.8h, v0.8b, #8
+; CHECK-NEON-NEXT: ushr v3.4h, v4.4h, #1
+; CHECK-NEON-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEON-NEXT: shll v1.4s, v2.4h, #16
+; CHECK-NEON-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT: rev32 v0.8b, v0.8b
+; CHECK-NEON-NEXT: rbit v0.8b, v0.8b
+; CHECK-NEON-NEXT: ushr v0.2s, v0.2s, #1
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmulh_v2i32_neon:
; CHECK-AES: // %bb.0:
+; CHECK-AES-NEXT: rev32 v1.8b, v1.8b
----------------
davemgreen wrote:
Some of these look worse with +aes?
https://github.com/llvm/llvm-project/pull/184468
More information about the llvm-commits
mailing list