[llvm] [SelectionDAG] Use ExpandIntRes_CLMUL to expand vector CLMUL via narrower legal types (PR #184468)

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 5 01:05:24 PST 2026


================
@@ -5504,209 +4808,108 @@ define <4 x i32> @clmulh_v4i32_neon(<4 x i32> %a, <4 x i32> %b) nounwind {
 define <2 x i32> @clmulh_v2i32_neon(<2 x i32> %a, <2 x i32> %b) nounwind {
 ; CHECK-NEON-LABEL: clmulh_v2i32_neon:
 ; CHECK-NEON:       // %bb.0:
-; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
-; CHECK-NEON-NEXT:    mov w9, #1 // =0x1
-; CHECK-NEON-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-NEON-NEXT:    dup v2.2d, x8
-; CHECK-NEON-NEXT:    dup v3.2d, x9
-; CHECK-NEON-NEXT:    mov w8, #4 // =0x4
-; CHECK-NEON-NEXT:    mov w9, #8 // =0x8
-; CHECK-NEON-NEXT:    dup v4.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #16 // =0x10
-; CHECK-NEON-NEXT:    dup v5.2d, x9
-; CHECK-NEON-NEXT:    dup v6.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #32 // =0x20
-; CHECK-NEON-NEXT:    and v2.16b, v1.16b, v2.16b
-; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v3.16b
-; CHECK-NEON-NEXT:    dup v7.2d, x8
-; CHECK-NEON-NEXT:    and v4.16b, v1.16b, v4.16b
-; CHECK-NEON-NEXT:    mov w8, #64 // =0x40
-; CHECK-NEON-NEXT:    mov w9, #512 // =0x200
-; CHECK-NEON-NEXT:    and v5.16b, v1.16b, v5.16b
-; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v6.16b
-; CHECK-NEON-NEXT:    dup v16.2d, x8
-; CHECK-NEON-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEON-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEON-NEXT:    and v7.16b, v1.16b, v7.16b
-; CHECK-NEON-NEXT:    xtn v4.2s, v4.2d
-; CHECK-NEON-NEXT:    mov w8, #128 // =0x80
-; CHECK-NEON-NEXT:    xtn v5.2s, v5.2d
-; CHECK-NEON-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEON-NEXT:    dup v17.2d, x8
-; CHECK-NEON-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEON-NEXT:    mov w8, #256 // =0x100
-; CHECK-NEON-NEXT:    and v16.16b, v1.16b, v16.16b
-; CHECK-NEON-NEXT:    umull v2.2d, v0.2s, v2.2s
-; CHECK-NEON-NEXT:    umull v3.2d, v0.2s, v3.2s
-; CHECK-NEON-NEXT:    dup v18.2d, x8
-; CHECK-NEON-NEXT:    umull v4.2d, v0.2s, v4.2s
-; CHECK-NEON-NEXT:    mov w8, #2048 // =0x800
-; CHECK-NEON-NEXT:    and v17.16b, v1.16b, v17.16b
-; CHECK-NEON-NEXT:    umull v5.2d, v0.2s, v5.2s
-; CHECK-NEON-NEXT:    umull v6.2d, v0.2s, v6.2s
-; CHECK-NEON-NEXT:    umull v7.2d, v0.2s, v7.2s
-; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v2.16b
-; CHECK-NEON-NEXT:    xtn v17.2s, v17.2d
-; CHECK-NEON-NEXT:    eor v3.16b, v4.16b, v5.16b
-; CHECK-NEON-NEXT:    xtn v4.2s, v16.2d
-; CHECK-NEON-NEXT:    dup v16.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #4096 // =0x1000
-; CHECK-NEON-NEXT:    and v5.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT:    dup v18.2d, x9
-; CHECK-NEON-NEXT:    dup v19.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #8192 // =0x2000
-; CHECK-NEON-NEXT:    umull v17.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v3.16b
-; CHECK-NEON-NEXT:    xtn v3.2s, v5.2d
-; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v7.16b
-; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v16.16b
-; CHECK-NEON-NEXT:    and v16.16b, v1.16b, v19.16b
-; CHECK-NEON-NEXT:    and v7.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT:    dup v18.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #1024 // =0x400
-; CHECK-NEON-NEXT:    umull v4.2d, v0.2s, v4.2s
-; CHECK-NEON-NEXT:    dup v19.2d, x8
-; CHECK-NEON-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEON-NEXT:    mov w8, #16384 // =0x4000
-; CHECK-NEON-NEXT:    xtn v16.2s, v16.2d
-; CHECK-NEON-NEXT:    and v18.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT:    dup v20.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #65536 // =0x10000
-; CHECK-NEON-NEXT:    umull v3.2d, v0.2s, v3.2s
-; CHECK-NEON-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEON-NEXT:    dup v21.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #131072 // =0x20000
-; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v19.16b
-; CHECK-NEON-NEXT:    eor v4.16b, v5.16b, v4.16b
-; CHECK-NEON-NEXT:    umull v5.2d, v0.2s, v6.2s
-; CHECK-NEON-NEXT:    dup v6.2d, x8
-; CHECK-NEON-NEXT:    umull v16.2d, v0.2s, v16.2s
-; CHECK-NEON-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEON-NEXT:    xtn v18.2s, v18.2d
-; CHECK-NEON-NEXT:    and v20.16b, v1.16b, v20.16b
-; CHECK-NEON-NEXT:    eor v3.16b, v17.16b, v3.16b
-; CHECK-NEON-NEXT:    xtn v17.2s, v19.2d
-; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v21.16b
-; CHECK-NEON-NEXT:    and v6.16b, v1.16b, v6.16b
-; CHECK-NEON-NEXT:    dup v21.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #262144 // =0x40000
-; CHECK-NEON-NEXT:    umull v7.2d, v0.2s, v7.2s
-; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v4.16b
-; CHECK-NEON-NEXT:    xtn v20.2s, v20.2d
-; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v16.16b
-; CHECK-NEON-NEXT:    umull v16.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT:    dup v18.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #4194304 // =0x400000
-; CHECK-NEON-NEXT:    xtn v19.2s, v19.2d
-; CHECK-NEON-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEON-NEXT:    dup v22.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #8388608 // =0x800000
-; CHECK-NEON-NEXT:    umull v17.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT:    dup v23.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #524288 // =0x80000
-; CHECK-NEON-NEXT:    umull v20.2d, v0.2s, v20.2s
-; CHECK-NEON-NEXT:    and v18.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT:    eor v3.16b, v3.16b, v7.16b
-; CHECK-NEON-NEXT:    eor v5.16b, v5.16b, v16.16b
-; CHECK-NEON-NEXT:    and v7.16b, v1.16b, v21.16b
-; CHECK-NEON-NEXT:    umull v16.2d, v0.2s, v19.2s
-; CHECK-NEON-NEXT:    umull v6.2d, v0.2s, v6.2s
-; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v22.16b
-; CHECK-NEON-NEXT:    and v21.16b, v1.16b, v23.16b
-; CHECK-NEON-NEXT:    xtn v18.2s, v18.2d
-; CHECK-NEON-NEXT:    eor v4.16b, v3.16b, v17.16b
-; CHECK-NEON-NEXT:    movi v23.4s, #128, lsl #24
-; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v20.16b
-; CHECK-NEON-NEXT:    xtn v5.2s, v7.2d
-; CHECK-NEON-NEXT:    dup v7.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #16777216 // =0x1000000
-; CHECK-NEON-NEXT:    xtn v17.2s, v19.2d
-; CHECK-NEON-NEXT:    xtn v19.2s, v21.2d
-; CHECK-NEON-NEXT:    dup v20.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #33554432 // =0x2000000
-; CHECK-NEON-NEXT:    eor v6.16b, v16.16b, v6.16b
-; CHECK-NEON-NEXT:    umull v16.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT:    dup v18.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #1048576 // =0x100000
-; CHECK-NEON-NEXT:    dup v21.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #2097152 // =0x200000
-; CHECK-NEON-NEXT:    and v7.16b, v1.16b, v7.16b
-; CHECK-NEON-NEXT:    and v20.16b, v1.16b, v20.16b
-; CHECK-NEON-NEXT:    dup v22.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #67108864 // =0x4000000
-; CHECK-NEON-NEXT:    umull v17.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT:    umull v19.2d, v0.2s, v19.2s
-; CHECK-NEON-NEXT:    fneg v23.2d, v23.2d
-; CHECK-NEON-NEXT:    eor v6.16b, v6.16b, v16.16b
-; CHECK-NEON-NEXT:    and v16.16b, v1.16b, v18.16b
-; CHECK-NEON-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEON-NEXT:    xtn v18.2s, v20.2d
-; CHECK-NEON-NEXT:    dup v20.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #134217728 // =0x8000000
-; CHECK-NEON-NEXT:    and v21.16b, v1.16b, v21.16b
-; CHECK-NEON-NEXT:    and v22.16b, v1.16b, v22.16b
-; CHECK-NEON-NEXT:    umull v5.2d, v0.2s, v5.2s
-; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v19.16b
-; CHECK-NEON-NEXT:    xtn v16.2s, v16.2d
-; CHECK-NEON-NEXT:    dup v19.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #268435456 // =0x10000000
-; CHECK-NEON-NEXT:    and v20.16b, v1.16b, v20.16b
-; CHECK-NEON-NEXT:    umull v7.2d, v0.2s, v7.2s
-; CHECK-NEON-NEXT:    umull v18.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT:    dup v24.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #536870912 // =0x20000000
-; CHECK-NEON-NEXT:    dup v25.2d, x8
-; CHECK-NEON-NEXT:    mov w8, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT:    and v19.16b, v1.16b, v19.16b
-; CHECK-NEON-NEXT:    xtn v21.2s, v21.2d
-; CHECK-NEON-NEXT:    xtn v20.2s, v20.2d
-; CHECK-NEON-NEXT:    dup v26.2d, x8
-; CHECK-NEON-NEXT:    umull v16.2d, v0.2s, v16.2s
-; CHECK-NEON-NEXT:    eor v6.16b, v6.16b, v7.16b
-; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v4.16b
-; CHECK-NEON-NEXT:    eor v17.16b, v17.16b, v18.16b
-; CHECK-NEON-NEXT:    xtn v18.2s, v22.2d
-; CHECK-NEON-NEXT:    xtn v19.2s, v19.2d
-; CHECK-NEON-NEXT:    and v22.16b, v1.16b, v24.16b
-; CHECK-NEON-NEXT:    and v24.16b, v1.16b, v25.16b
-; CHECK-NEON-NEXT:    and v25.16b, v1.16b, v26.16b
-; CHECK-NEON-NEXT:    umull v21.2d, v0.2s, v21.2s
-; CHECK-NEON-NEXT:    umull v20.2d, v0.2s, v20.2s
-; CHECK-NEON-NEXT:    and v1.16b, v1.16b, v23.16b
-; CHECK-NEON-NEXT:    eor v7.16b, v17.16b, v16.16b
-; CHECK-NEON-NEXT:    eor v3.16b, v3.16b, v5.16b
-; CHECK-NEON-NEXT:    xtn v16.2s, v22.2d
-; CHECK-NEON-NEXT:    xtn v17.2s, v24.2d
-; CHECK-NEON-NEXT:    xtn v22.2s, v25.2d
-; CHECK-NEON-NEXT:    umull v4.2d, v0.2s, v18.2s
-; CHECK-NEON-NEXT:    umull v18.2d, v0.2s, v19.2s
-; CHECK-NEON-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v21.16b
-; CHECK-NEON-NEXT:    eor v6.16b, v7.16b, v20.16b
-; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v3.16b
-; CHECK-NEON-NEXT:    umull v7.2d, v0.2s, v16.2s
-; CHECK-NEON-NEXT:    umull v16.2d, v0.2s, v17.2s
-; CHECK-NEON-NEXT:    umull v17.2d, v0.2s, v22.2s
-; CHECK-NEON-NEXT:    eor v3.16b, v5.16b, v4.16b
-; CHECK-NEON-NEXT:    eor v4.16b, v6.16b, v18.16b
-; CHECK-NEON-NEXT:    umull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT:    eor v1.16b, v2.16b, v3.16b
-; CHECK-NEON-NEXT:    eor v2.16b, v4.16b, v7.16b
-; CHECK-NEON-NEXT:    eor v3.16b, v16.16b, v17.16b
-; CHECK-NEON-NEXT:    eor v1.16b, v1.16b, v2.16b
-; CHECK-NEON-NEXT:    eor v0.16b, v3.16b, v0.16b
-; CHECK-NEON-NEXT:    eor v0.16b, v1.16b, v0.16b
-; CHECK-NEON-NEXT:    shrn v0.2s, v0.2d, #32
+; CHECK-NEON-NEXT:    rev32 v1.8b, v1.8b
+; CHECK-NEON-NEXT:    rev32 v0.8b, v0.8b
+; CHECK-NEON-NEXT:    rbit v1.8b, v1.8b
+; CHECK-NEON-NEXT:    rbit v2.8b, v0.8b
+; CHECK-NEON-NEXT:    xtn v0.4h, v1.4s
+; CHECK-NEON-NEXT:    xtn v3.4h, v2.4s
+; CHECK-NEON-NEXT:    shrn v16.4h, v2.4s, #16
+; CHECK-NEON-NEXT:    shrn v17.4h, v1.4s, #16
+; CHECK-NEON-NEXT:    xtn v20.8b, v16.8h
+; CHECK-NEON-NEXT:    shrn v16.8b, v16.8h, #8
+; CHECK-NEON-NEXT:    rev16 v4.8b, v0.8b
+; CHECK-NEON-NEXT:    rev16 v5.8b, v3.8b
+; CHECK-NEON-NEXT:    xtn v1.8b, v0.8h
+; CHECK-NEON-NEXT:    xtn v21.8b, v17.8h
+; CHECK-NEON-NEXT:    xtn v2.8b, v3.8h
+; CHECK-NEON-NEXT:    shrn v0.8b, v0.8h, #8
+; CHECK-NEON-NEXT:    shrn v3.8b, v3.8h, #8
+; CHECK-NEON-NEXT:    shrn v17.8b, v17.8h, #8
+; CHECK-NEON-NEXT:    rbit v4.8b, v4.8b
+; CHECK-NEON-NEXT:    rbit v5.8b, v5.8b
+; CHECK-NEON-NEXT:    rbit v22.8b, v1.8b
+; CHECK-NEON-NEXT:    rbit v23.8b, v21.8b
+; CHECK-NEON-NEXT:    rbit v24.8b, v2.8b
+; CHECK-NEON-NEXT:    pmul v16.8b, v16.8b, v1.8b
+; CHECK-NEON-NEXT:    pmul v25.8b, v20.8b, v0.8b
+; CHECK-NEON-NEXT:    pmul v17.8b, v2.8b, v17.8b
+; CHECK-NEON-NEXT:    pmul v0.8b, v2.8b, v0.8b
+; CHECK-NEON-NEXT:    xtn v6.8b, v4.8h
+; CHECK-NEON-NEXT:    xtn v7.8b, v5.8h
+; CHECK-NEON-NEXT:    shrn v5.8b, v5.8h, #8
+; CHECK-NEON-NEXT:    shrn v4.8b, v4.8h, #8
+; CHECK-NEON-NEXT:    pmul v23.8b, v24.8b, v23.8b
+; CHECK-NEON-NEXT:    rbit v18.8b, v6.8b
+; CHECK-NEON-NEXT:    rbit v19.8b, v7.8b
+; CHECK-NEON-NEXT:    pmul v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    pmul v4.8b, v7.8b, v4.8b
+; CHECK-NEON-NEXT:    pmul v6.8b, v7.8b, v6.8b
+; CHECK-NEON-NEXT:    rbit v7.8b, v23.8b
+; CHECK-NEON-NEXT:    pmul v18.8b, v19.8b, v18.8b
+; CHECK-NEON-NEXT:    rbit v19.8b, v20.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    ushll v6.8h, v6.8b, #0
+; CHECK-NEON-NEXT:    ushr v7.8b, v7.8b, #1
+; CHECK-NEON-NEXT:    rbit v18.8b, v18.8b
+; CHECK-NEON-NEXT:    pmul v19.8b, v19.8b, v22.8b
+; CHECK-NEON-NEXT:    ushr v5.8b, v18.8b, #1
+; CHECK-NEON-NEXT:    rbit v18.8b, v19.8b
+; CHECK-NEON-NEXT:    pmul v19.8b, v3.8b, v21.8b
+; CHECK-NEON-NEXT:    pmul v3.8b, v3.8b, v1.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v25.8b, v16.8b
+; CHECK-NEON-NEXT:    eor v16.8b, v17.8b, v19.8b
+; CHECK-NEON-NEXT:    pmul v17.8b, v24.8b, v22.8b
+; CHECK-NEON-NEXT:    ushr v18.8b, v18.8b, #1
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    shll v4.8h, v4.8b, #8
+; CHECK-NEON-NEXT:    eor v5.8b, v18.8b, v5.8b
+; CHECK-NEON-NEXT:    pmul v18.8b, v20.8b, v1.8b
+; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
+; CHECK-NEON-NEXT:    pmul v16.8b, v2.8b, v21.8b
+; CHECK-NEON-NEXT:    pmul v1.8b, v2.8b, v1.8b
+; CHECK-NEON-NEXT:    orr v4.16b, v6.16b, v4.16b
+; CHECK-NEON-NEXT:    rbit v6.8b, v17.8b
+; CHECK-NEON-NEXT:    shll v5.8h, v5.8b, #8
+; CHECK-NEON-NEXT:    shll v7.8h, v7.8b, #8
+; CHECK-NEON-NEXT:    ushll v17.8h, v18.8b, #0
+; CHECK-NEON-NEXT:    rev16 v4.8b, v4.8b
+; CHECK-NEON-NEXT:    ushll v16.8h, v16.8b, #0
+; CHECK-NEON-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEON-NEXT:    ushr v3.8b, v6.8b, #1
+; CHECK-NEON-NEXT:    orr v5.16b, v17.16b, v5.16b
+; CHECK-NEON-NEXT:    orr v6.16b, v16.16b, v7.16b
+; CHECK-NEON-NEXT:    rbit v4.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v3.8b, v0.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v6.8b, v5.8b
+; CHECK-NEON-NEXT:    shll v0.8h, v0.8b, #8
+; CHECK-NEON-NEXT:    ushr v3.4h, v4.4h, #1
+; CHECK-NEON-NEXT:    orr v0.16b, v1.16b, v0.16b
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEON-NEXT:    shll v1.4s, v2.4h, #16
+; CHECK-NEON-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEON-NEXT:    rev32 v0.8b, v0.8b
+; CHECK-NEON-NEXT:    rbit v0.8b, v0.8b
+; CHECK-NEON-NEXT:    ushr v0.2s, v0.2s, #1
 ; CHECK-NEON-NEXT:    ret
 ;
 ; CHECK-AES-LABEL: clmulh_v2i32_neon:
 ; CHECK-AES:       // %bb.0:
+; CHECK-AES-NEXT:    rev32 v1.8b, v1.8b
----------------
davemgreen wrote:

Some of these look worse with +aes?

https://github.com/llvm/llvm-project/pull/184468


More information about the llvm-commits mailing list