[llvm] [X86] Add APX imulzu support. (PR #116806)

Thu Nov 21 06:23:09 PST 2024

================
@@ -0,0 +1,238 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mattr=+zu | FileCheck %s --check-prefix=ZU
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=NOZU
+
+; Test generation of 16b imulzu when -mattr=+zu is specified.
+; The mulzu_* tests check for basic generation, which will fold away a zero-extend of the
+; result if present.
+; The following tests are modifications of selected test/CodeGen/X86/imul.ll tests with
+; 16b multiplies, to check that common strength reductions in ISel are still performed
+; when -mattr=+zu is in effect.
+;
+; FIXME: several cases from imul.ll covering DAG combines, in particular those using LEA,
+; are not ported as X86's IsDesirableToPromoteOp has no way to accurately identify when
+; promotion will permit a better sequence than an unpromoted imulzu.
+; These cases should be added when they are implemented.
+
+define i32 @mulzu_16_32(i16 %A) {
+; ZU-LABEL: mulzu_16_32:
+; ZU:       # %bb.0:
+; ZU-NEXT:    imulzuw $1234, %di, %ax # imm = 0x4D2
+; ZU-NEXT:    retq
+;
+; NOZU-LABEL: mulzu_16_32:
+; NOZU:       # %bb.0:
+; NOZU-NEXT:    imull $1234, %edi, %eax # imm = 0x4D2
+; NOZU-NEXT:    movzwl %ax, %eax
+; NOZU-NEXT:    retq
+    %mul = mul i16 %A, 1234
+    %r = zext i16 %mul to i32
+    ret i32 %r
+}
+
+define i64 @mulzu_16_64(i16 %A) {
+; ZU-LABEL: mulzu_16_64:
+; ZU:       # %bb.0:
+; ZU-NEXT:    imulzuw $1234, %di, %ax # imm = 0x4D2
+; ZU-NEXT:    retq
+;
+; NOZU-LABEL: mulzu_16_64:
+; NOZU:       # %bb.0:
+; NOZU-NEXT:    imull $1234, %edi, %eax # imm = 0x4D2
+; NOZU-NEXT:    movzwl %ax, %eax
+; NOZU-NEXT:    retq
+    %mul = mul i16 %A, 1234
+    %r = zext i16 %mul to i64
+    ret i64 %r
+}
+
+define i32 @mulzu_16_32_mem(ptr %P) {
+; ZU-LABEL: mulzu_16_32_mem:
+; ZU:       # %bb.0:
+; ZU-NEXT:    imulzuw $1234, (%rdi), %ax # imm = 0x4D2
+; ZU-NEXT:    retq
+;
+; NOZU-LABEL: mulzu_16_32_mem:
+; NOZU:       # %bb.0:
+; NOZU-NEXT:    movzwl (%rdi), %eax
+; NOZU-NEXT:    imull $1234, %eax, %eax # imm = 0x4D2
+; NOZU-NEXT:    movzwl %ax, %eax
+; NOZU-NEXT:    retq
+    %gep = getelementptr i16, ptr %P, i64 0
+    %A = load i16, ptr %gep
+    %mul = mul i16 %A, 1234
+    %r = zext i16 %mul to i32
+    ret i32 %r
+}
+
+define i64 @mulzu_16_64_mem(ptr %P) {
+; ZU-LABEL: mulzu_16_64_mem:
+; ZU:       # %bb.0:
+; ZU-NEXT:    imulzuw $1234, (%rdi), %ax # imm = 0x4D2
+; ZU-NEXT:    retq
+;
+; NOZU-LABEL: mulzu_16_64_mem:
+; NOZU:       # %bb.0:
+; NOZU-NEXT:    movzwl (%rdi), %eax
+; NOZU-NEXT:    imull $1234, %eax, %eax # imm = 0x4D2
+; NOZU-NEXT:    movzwl %ax, %eax
+; NOZU-NEXT:    retq
+    %gep = getelementptr i16, ptr %P, i64 0
+    %A = load i16, ptr %gep
+    %mul = mul i16 %A, 1234
+    %r = zext i16 %mul to i64
+    ret i64 %r
+}
+
+define void @mulzu_16_store(i16 %A, ptr %R) {
+; ZU-LABEL: mulzu_16_store:
+; ZU:       # %bb.0:
+; ZU-NEXT:    imulzuw $1234, %di, %ax # imm = 0x4D2
----------------
daniel-zabawa wrote:

If the non-ZU encoding of 16b imul is preferable, I'll remove the patterns that don't refer to a zero extend and ensure that MUL is handled as before unlress there is a zext folding opportunity.

https://github.com/llvm/llvm-project/pull/116806