[llvm] [X86] Improve transform for add-like nodes to `add` (PR #83691)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 2 12:56:58 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
We previously did this only in tablegen, after we would have already
dropped `disjoint` flag from `or`.
---
Patch is 337.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83691.diff
115 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+16-1)
- (modified) llvm/lib/Target/X86/X86InstrCompiler.td (+18-1)
- (modified) llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll (+7-4)
- (modified) llvm/test/CodeGen/X86/3addr-or.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/addcarry2.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/and-or-fold.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/andimm8.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/atomic-unordered.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx512-calling-conv.ll (+146-146)
- (modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+17-17)
- (modified) llvm/test/CodeGen/X86/avx512-vec-cmp.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (+32-32)
- (modified) llvm/test/CodeGen/X86/bfloat.ll (+64-64)
- (modified) llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll (+11-11)
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-256.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-512.ll (+20-20)
- (modified) llvm/test/CodeGen/X86/bitcast-vector-bool.ll (+41-41)
- (modified) llvm/test/CodeGen/X86/bitreverse.ll (+65-65)
- (modified) llvm/test/CodeGen/X86/bitselect.ll (+12-11)
- (modified) llvm/test/CodeGen/X86/bool-math.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/bool-vector.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/bswap.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/bswap_tree2.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/buildvec-insertvec.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/clz.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/combine-bitreverse.ll (+18-18)
- (modified) llvm/test/CodeGen/X86/combine-bswap.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-fneg.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/combine-rotates.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/commute-two-addr.ll (+62-13)
- (modified) llvm/test/CodeGen/X86/dagcombine-select.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/dagcombine-shifts.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/disable-shrink-store.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/extract-bits.ll (+64-64)
- (modified) llvm/test/CodeGen/X86/fold-masked-merge.ll (+26-22)
- (modified) llvm/test/CodeGen/X86/fp128-i128.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/fpenv.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll (+17-17)
- (modified) llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll (+17-17)
- (modified) llvm/test/CodeGen/X86/fshl.ll (+10-10)
- (modified) llvm/test/CodeGen/X86/fshr.ll (+11-11)
- (modified) llvm/test/CodeGen/X86/funnel-shift.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/half.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll (+14-14)
- (modified) llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/insert.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/is_fpclass-fp80.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/is_fpclass.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/kshift.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/limited-prec.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/llvm.frexp.ll (+14-14)
- (modified) llvm/test/CodeGen/X86/load-chain.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/load-combine.ll (+46-41)
- (modified) llvm/test/CodeGen/X86/load-local-v3i1.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/load-local-v3i129.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/load-local-v4i5.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/logic-shift.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/madd.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/masked_compressstore.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/masked_expandload.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/masked_store.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/memset-inline.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/movmsk-cmp.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/mul128.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/no-wide-load.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/or-lea.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr20011.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr23664.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr27202.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr28173.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr35636.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr35763.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr43820.ll (+16-16)
- (modified) llvm/test/CodeGen/X86/pr47299.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/pr62653.ll (+38-36)
- (modified) llvm/test/CodeGen/X86/pr69965.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr77459.ll (+17-17)
- (modified) llvm/test/CodeGen/X86/promote-vec3.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/rev16.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/rotate-extract.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/select.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/select_const.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/setcc-fsh.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shrink-compare-pgso.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shrink-compare.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/smul_fix.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/smul_fix_sat.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/split-store.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll (+52-52)
- (modified) llvm/test/CodeGen/X86/umul_fix_sat.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll (+35-34)
- (modified) llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll (+28-21)
- (modified) llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-bitreverse.ll (+20-20)
- (modified) llvm/test/CodeGen/X86/vector-compare-all_of.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-compare-results.ll (+35-35)
- (modified) llvm/test/CodeGen/X86/vector-pcmp.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-v1.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-trunc.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-zext.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/xor-lea.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5cbd9ab4dc2d6c..9f34a4e1870f71 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5294,11 +5294,26 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
if (tryVPTERNLOG(Node))
return;
-
[[fallthrough]];
case ISD::ADD:
if (Opcode == ISD::ADD && matchBitExtract(Node))
return;
+
+ // Convert addlike to add before final selection. Do this before we drop
+ // flags like `disjoint`.
+ // NB: Conversion to add is preferable so we use `lea` in codegen.
+ if (Opcode != ISD::ADD && NVT.isScalarInteger() &&
+ (Opcode == ISD::OR ||
+ (NVT == MVT::i8 || NVT == MVT::i16 || NVT == MVT::i32)) &&
+ CurDAG->isADDLike(SDValue(Node, 0))
+ ) {
+ SDValue AsAdd = CurDAG->getNode(ISD::ADD, SDLoc(Node), NVT,
+ Node->getOperand(0), Node->getOperand(1));
+ ReplaceUses(SDValue(Node, 0), AsAdd);
+ CurDAG->RemoveDeadNode(Node);
+ Node = AsAdd.getNode();
+ Opcode = ISD::ADD;
+ }
[[fallthrough]];
case ISD::SUB: {
// Try to avoid folding immediates with multiple uses for optsize.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index f393f86e64aadd..b31c5be87a5839 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1574,7 +1574,24 @@ def : Pat<(or (and GR64:$dst, -65536),
def : Pat<(or (and GR32:$dst, -65536),
(i32 (zextloadi16 addr:$src))),
- (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+
+// We convert or -> add when the or is disjoint so need to handle for add as well.
+def : Pat<(add (and GR64:$dst, -256),
+ (i64 (zextloadi8 addr:$src))),
+ (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
+
+def : Pat<(add (and GR32:$dst, -256),
+ (i32 (zextloadi8 addr:$src))),
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
+
+def : Pat<(add (and GR64:$dst, -65536),
+ (i64 (zextloadi16 addr:$src))),
+ (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+
+def : Pat<(add (and GR32:$dst, -65536),
+ (i32 (zextloadi16 addr:$src))),
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
// To avoid needing to materialize an immediate in a register, use a 32-bit and
// with implicit zero-extension instead of a 64-bit and if the immediate has at
diff --git a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 609be3bb2e54f0..50e736ac68d29e 100644
--- a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s | FileCheck %s
; Check that the shr(shl X, 56), 48) is not mistakenly turned into
@@ -16,11 +17,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-unknown-linux-gnu"
define i64 @foo(i64 %b) nounwind readnone {
-entry:
; CHECK-LABEL: foo:
-; CHECK: movsbq %dil, %rax
-; CHECK: shlq $8, %rax
-; CHECK: orq $1, %rax
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movsbq %dil, %rax
+; CHECK-NEXT: shlq $8, %rax
+; CHECK-NEXT: incq %rax
+; CHECK-NEXT: retq
+entry:
%shl = shl i64 %b, 56 ; <i64> [#uses=1]
%shr = ashr i64 %shl, 48 ; <i64> [#uses=1]
%add5 = or i64 %shr, 1 ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/3addr-or.ll b/llvm/test/CodeGen/X86/3addr-or.ll
index 65f6d2b4123e8e..1f466afcadc9ca 100644
--- a/llvm/test/CodeGen/X86/3addr-or.ll
+++ b/llvm/test/CodeGen/X86/3addr-or.ll
@@ -24,7 +24,7 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
; CHECK-NEXT: andl $48, %edi
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: retq
%C = zext i8 %A to i64
%D = shl i64 %C, 4
@@ -42,7 +42,7 @@ define void @test3(i32 %x, ptr %P) nounwind readnone ssp {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: shll $5, %edi
-; CHECK-NEXT: orl $3, %edi
+; CHECK-NEXT: addl $3, %edi
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: retq
%t0 = shl i32 %x, 5
@@ -71,7 +71,7 @@ define void @test5(i32 %a, i32 %b, ptr nocapture %P) nounwind ssp {
; CHECK: # %bb.0:
; CHECK-NEXT: andl $6, %edi
; CHECK-NEXT: andl $16, %esi
-; CHECK-NEXT: orl %edi, %esi
+; CHECK-NEXT: addl %edi, %esi
; CHECK-NEXT: movl %esi, (%rdx)
; CHECK-NEXT: retq
%and = and i32 %a, 6
diff --git a/llvm/test/CodeGen/X86/addcarry2.ll b/llvm/test/CodeGen/X86/addcarry2.ll
index 0338577dbddc2b..1a5d0f4fe45416 100644
--- a/llvm/test/CodeGen/X86/addcarry2.ll
+++ b/llvm/test/CodeGen/X86/addcarry2.ll
@@ -138,7 +138,7 @@ define void @adc_load_store_32_127(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
; X64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
; X64-NEXT: shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
; X64-NEXT: movl %edx, %ecx # encoding: [0x89,0xd1]
-; X64-NEXT: orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
+; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
; X64-NEXT: movabsq $545460846593, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x7f,0x00,0x00,0x00]
; X64-NEXT: # imm = 0x7F00000001
; X64-NEXT: xorl %edx, %edx # encoding: [0x31,0xd2]
@@ -178,7 +178,7 @@ define void @adc_load_store_32_128(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
; X64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
; X64-NEXT: shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
; X64-NEXT: movl %edx, %ecx # encoding: [0x89,0xd1]
-; X64-NEXT: orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
+; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
; X64-NEXT: movabsq $549755813889, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00]
; X64-NEXT: # imm = 0x8000000001
; X64-NEXT: xorl %edx, %edx # encoding: [0x31,0xd2]
diff --git a/llvm/test/CodeGen/X86/and-or-fold.ll b/llvm/test/CodeGen/X86/and-or-fold.ll
index 1bb5fdeebac71c..4071b364a25c3b 100644
--- a/llvm/test/CodeGen/X86/and-or-fold.ll
+++ b/llvm/test/CodeGen/X86/and-or-fold.ll
@@ -45,7 +45,7 @@ define i32 @test1(i32 %x, i16 %y) {
; DARWIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; DARWIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; DARWIN-NEXT: shll $16, %eax
-; DARWIN-NEXT: orl %ecx, %eax
+; DARWIN-NEXT: addl %ecx, %eax
; DARWIN-NEXT: andl $16711807, %eax ## imm = 0xFF007F
; DARWIN-NEXT: retl
;
@@ -54,7 +54,7 @@ define i32 @test1(i32 %x, i16 %y) {
; DARWIN-OPT-NEXT: andl $127, %esi
; DARWIN-OPT-NEXT: movzbl %dil, %eax
; DARWIN-OPT-NEXT: shll $16, %eax
-; DARWIN-OPT-NEXT: orl %esi, %eax
+; DARWIN-OPT-NEXT: addl %esi, %eax
; DARWIN-OPT-NEXT: retq
%tmp1 = zext i16 %y to i32
%tmp2 = and i32 %tmp1, 127
diff --git a/llvm/test/CodeGen/X86/andimm8.ll b/llvm/test/CodeGen/X86/andimm8.ll
index 6242d4f4c222bb..506e28300e71b0 100644
--- a/llvm/test/CodeGen/X86/andimm8.ll
+++ b/llvm/test/CodeGen/X86/andimm8.ll
@@ -29,7 +29,7 @@ define void @foo(i64 %zed, ptr %x) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x08]
; X86-NEXT: andl $-4, %ecx # encoding: [0x83,0xe1,0xfc]
-; X86-NEXT: orl $2, %ecx # encoding: [0x83,0xc9,0x02]
+; X86-NEXT: addl $2, %ecx # encoding: [0x83,0xc1,0x02]
; X86-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
; X86-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
; X86-NEXT: retl # encoding: [0xc3]
@@ -37,7 +37,7 @@ define void @foo(i64 %zed, ptr %x) nounwind {
; X64-LABEL: foo:
; X64: # %bb.0:
; X64-NEXT: andq $-4, %rdi # encoding: [0x48,0x83,0xe7,0xfc]
-; X64-NEXT: orq $2, %rdi # encoding: [0x48,0x83,0xcf,0x02]
+; X64-NEXT: addq $2, %rdi # encoding: [0x48,0x83,0xc7,0x02]
; X64-NEXT: movq %rdi, (%rsi) # encoding: [0x48,0x89,0x3e]
; X64-NEXT: retq # encoding: [0xc3]
%t1 = and i64 %zed, -4
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index df123be53474f0..903951dd5a8cff 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -2359,7 +2359,7 @@ define i16 @load_combine(ptr %p) {
; CHECK-O3-NEXT: movzbl (%rdi), %ecx
; CHECK-O3-NEXT: movzbl 1(%rdi), %eax
; CHECK-O3-NEXT: shll $8, %eax
-; CHECK-O3-NEXT: orl %ecx, %eax
+; CHECK-O3-NEXT: addl %ecx, %eax
; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O3-NEXT: retq
%v1 = load atomic i8, ptr %p unordered, align 2
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index b39b089faa2a5e..b4c37a2e34d95d 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -910,13 +910,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: kandw %k2, %k0, %k0
; KNL-NEXT: kmovw %r10d, %k2
; KNL-NEXT: kandw %k1, %k2, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftrw $3, %k0, %k1
-; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftrw $4, %k0, %k1
; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftrw $5, %k0, %k1
@@ -928,9 +928,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: kshiftrw $8, %k0, %k1
; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftrw $9, %k0, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftrw $10, %k0, %k1
; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: kshiftrw $10, %k0, %k1
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftrw $11, %k0, %k1
; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftrw $12, %k0, %k1
@@ -938,25 +938,25 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: kshiftrw $13, %k0, %k1
; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftrw $14, %k0, %k1
-; KNL-NEXT: andl $1, %edx
-; KNL-NEXT: movb %dl, 2(%rax)
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: andl $1, %edx
+; KNL-NEXT: andl $1, %esi
+; KNL-NEXT: movb %sil, 2(%rax)
+; KNL-NEXT: kmovw %k0, %esi
+; KNL-NEXT: andl $1, %esi
; KNL-NEXT: andl $1, %r9d
-; KNL-NEXT: leal (%rdx,%r9,2), %r9d
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: leal (%rsi,%r9,2), %r9d
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: andl $1, %r8d
; KNL-NEXT: leal (%r9,%r8,4), %r9d
; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: andl $1, %esi
-; KNL-NEXT: leal (%r9,%rsi,8), %esi
+; KNL-NEXT: andl $1, %edx
+; KNL-NEXT: leal (%r9,%rdx,8), %edx
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: shll $4, %edi
-; KNL-NEXT: orl %esi, %edi
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: shll $5, %ecx
-; KNL-NEXT: orl %edi, %ecx
+; KNL-NEXT: addl %edi, %ecx
+; KNL-NEXT: addl %edx, %ecx
; KNL-NEXT: andl $1, %r10d
; KNL-NEXT: shll $6, %r10d
; KNL-NEXT: andl $1, %ebx
@@ -965,28 +965,28 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: andl $1, %ebp
; KNL-NEXT: shll $8, %ebp
; KNL-NEXT: orl %ebx, %ebp
-; KNL-NEXT: andl $1, %r14d
-; KNL-NEXT: shll $9, %r14d
-; KNL-NEXT: orl %ebp, %r14d
; KNL-NEXT: andl $1, %r11d
-; KNL-NEXT: shll $10, %r11d
-; KNL-NEXT: orl %r14d, %r11d
+; KNL-NEXT: shll $9, %r11d
+; KNL-NEXT: orl %ebp, %r11d
; KNL-NEXT: orl %ecx, %r11d
+; KNL-NEXT: andl $1, %r14d
+; KNL-NEXT: shll $10, %r14d
; KNL-NEXT: andl $1, %r15d
; KNL-NEXT: shll $11, %r15d
+; KNL-NEXT: orl %r14d, %r15d
; KNL-NEXT: andl $1, %r12d
; KNL-NEXT: shll $12, %r12d
; KNL-NEXT: orl %r15d, %r12d
; KNL-NEXT: andl $1, %r13d
; KNL-NEXT: shll $13, %r13d
; KNL-NEXT: orl %r12d, %r13d
-; KNL-NEXT: andl $1, %edx
-; KNL-NEXT: shll $14, %edx
-; KNL-NEXT: orl %r13d, %edx
+; KNL-NEXT: andl $1, %esi
+; KNL-NEXT: shll $14, %esi
+; KNL-NEXT: orl %r13d, %esi
+; KNL-NEXT: orl %r11d, %esi
; KNL-NEXT: andl $1, %r8d
; KNL-NEXT: shll $15, %r8d
-; KNL-NEXT: orl %edx, %r8d
-; KNL-NEXT: orl %r11d, %r8d
+; KNL-NEXT: orl %esi, %r8d
; KNL-NEXT: movw %r8w, (%rax)
; KNL-NEXT: popq %rbx
; KNL-NEXT: popq %r12
@@ -1223,13 +1223,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT: kandd %k1, %k0, %k0
; SKX-NEXT: kshiftrd $16, %k0, %k1
-; SKX-NEXT: kmovd %k1, %edx
+; SKX-NEXT: kmovd %k1, %esi
; SKX-NEXT: kshiftrd $1, %k0, %k1
; SKX-NEXT: kmovd %k1, %r9d
; SKX-NEXT: kshiftrd $2, %k0, %k1
; SKX-NEXT: kmovd %k1, %r8d
; SKX-NEXT: kshiftrd $3, %k0, %k1
-; SKX-NEXT: kmovd %k1, %esi
+; SKX-NEXT: kmovd %k1, %edx
; SKX-NEXT: kshiftrd $4, %k0, %k1
; SKX-NEXT: kmovd %k1, %edi
; SKX-NEXT: kshiftrd $5, %k0, %k1
@@ -1241,9 +1241,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: kshiftrd $8, %k0, %k1
; SKX-NEXT: kmovd %k1, %ebp
; SKX-NEXT: kshiftrd $9, %k0, %k1
-; SKX-NEXT: kmovd %k1, %r14d
-; SKX-NEXT: kshiftrd $10, %k0, %k1
; SKX-NEXT: kmovd %k1, %r11d
+; SKX-NEXT: kshiftrd $10, %k0, %k1
+; SKX-NEXT: kmovd %k1, %r14d
; SKX-NEXT: kshiftrd $11, %k0, %k1
; SKX-NEXT: kmovd %k1, %r15d
; SKX-NEXT: kshiftrd $12, %k0, %k1
@@ -1251,25 +1251,25 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: kshiftrd $13, %k0, %k1
; SKX-NEXT: kmovd %k1, %r13d
; SKX-NEXT: kshiftrd $14, %k0, %k1
-; SKX-NEXT: andl $1, %edx
-; SKX-NEXT: movb %dl, 2(%rax)
-; SKX-NEXT: kmovd %k0, %edx
-; SKX-NEXT: andl $1, %edx
+; SKX-NEXT: andl $1, %esi
+; SKX-NEXT: movb %sil, 2(%rax)
+; SKX-NEXT: kmovd %k0, %esi
+; SKX-NEXT: andl $1, %esi
; SKX-NEXT: andl $1, %r9d
-; SKX-NEXT: leal (%rdx,%r9,2), %r9d
-; SKX-NEXT: kmovd %k1, %edx
+; SKX-NEXT: leal (%rsi,%r9,2), %r9d
+; SKX-NEXT: kmovd %k1, %esi
; SKX-NEXT: kshiftrd $15, %k0, %k0
; SKX-NEXT: andl $1, %r8d
; SKX-NEXT: leal (%r9,%r8,4), %r9d
; SKX-NEXT: kmovd %k0, %r8d
-; SKX-NEXT: andl $1, %esi
-; SKX-NEXT: leal (%r9,%rsi,8), %esi
+; SKX-NEXT: andl $1, %edx
+; SKX-NEXT: leal (%r9,%rdx,8), %edx
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: shll $4, %edi
-; SKX-NEXT: orl %esi, %edi
; SKX-NEXT: andl $1, %ecx
; SKX-NEXT: shll $5, %ecx
-; SKX-NEXT: orl %edi, %ecx
+; SKX-NEXT: addl %edi, %ecx
+; SKX-NEXT: addl %edx, %ecx
; SKX-NEXT: andl $1, %r10d
; SKX-NEXT: shll $6, %r10d
; SKX-NEXT: andl $1, %ebx
@@ -1278,28 +1278,28 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: andl $1, %ebp
; SKX-NEXT: shll $8, %ebp
; SKX-NEXT: orl %ebx, %ebp
-; SKX-NEXT: andl $1, %r14d
-; SKX-NEXT: shll $9, %r14d
-; SKX-NEXT: orl %ebp, %r14d
; SKX-NEXT: andl $1, %r11d
-; SKX-NEXT: shll $10, %r11d
-; SKX-NEXT: orl %r14d, %r11d
+; SKX-NEXT: shll $9, %r11d
+; SKX-NEXT: orl %ebp, %r11d
; SKX-NEXT: orl %ecx, %r11d
+; SKX-NEXT: andl $1, %r14d
+; SKX-NEXT: shll $10, %r14d
; SKX-NEXT: andl $1, %r15d
; SKX-NEXT: shll $11, %r15d
+; SKX-NEXT: orl %r14d, %r15d
; SKX-NEXT: andl $1, %r12d
; SKX-NEXT: shll $12, %r12d
; SKX-NEXT: orl %r15d, %r12d
; SKX-NEXT: andl $1, %r13d
; SKX-NEXT: shll $13, %r13d
; SKX-NEXT: orl %r12d, %r13d
-; SKX-NEXT: andl $1, %edx
-; SKX-NEXT: shll $14, %edx
-; SKX-NEXT: orl %r13d, %edx
+; SKX-NEXT: andl $1, %esi
+; SKX-NEXT: shll $14, %esi
+; SKX-NEXT: orl %r13d, %esi
+; SKX-NEXT: orl %r11d, %esi
; SKX-NEXT: andl $1, %r8d
; SKX-NEXT: shll $15, %r8d
-; SKX-NEXT: orl %edx, %r8d
-; SKX-NEXT: orl %r11d, %r8d
+; SKX-NEXT: orl %esi, %r8d
; SKX-NEXT: movw %r8w, (%rax)
; SKX-NEXT: popq %rbx
; SKX-NEXT: popq %r12
@@ -1556,9 +1556,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL_X32-NEXT: kshiftrw $1, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $2, %k0, %k1
-; KNL_X32-NEXT: kmovw %k1, %esi
-; KNL_X32-NEXT: kshiftrw $3, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edi
+; KNL_X32-NEXT: kshiftrw $3, %k0, %k1
+; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $4, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $5, %k0, %k1
@@ -1569,67 +1569,67 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL_X32-NEXT: kmovw %k0, %ebx
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: andl $1, %ebp
-; KNL_X32-NEXT: leal (%ebx,%ebp,2), %ebx
-; KNL_X32-NEXT: kmovw %k1, %ebp
+; KNL_X32-NEXT: leal (%ebx,%ebp,2), %ebp
+; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $7, %k0, %k1
-; KNL_X32-NEXT: andl $1, %esi
-; KNL_X32-NEXT: leal (%ebx,%esi,4), %ebx
-; KNL_X32-NEXT: kmovw %k1, %esi
-; KNL_X32-NEXT: kshiftrw $8, %k0, %k1
; KNL_X32-NEXT: andl $1, %edi
-; KNL_X32-NEXT: leal (%ebx,%edi,8), %ebx
+; KNL_X32-NEXT: leal (%ebp,%edi,4), %ebp
; KNL_X32-NEXT: kmovw %k1, %edi
+; KNL_X32-NEXT: kshiftrw $8, %k0, %k1
+; KNL_X32-NEXT: andl $1, %esi
+; KNL_X32-NEXT: leal (%ebp,%esi,8), %ebp
+; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $9, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
; KNL_X32-NEXT: shll $4, %edx
-; KNL_X32-NEXT: orl %ebx, %edx
-; KNL_X32-NEXT: kmovw %k1, %ebx
-; KNL_X32-NEXT: kshiftrw $10, %k0, %k1
; KNL_X32-NEXT: andl $1, %ecx
; KNL_X32-NEXT: shll $5, %ecx
-; KNL_X32-NEXT: orl %edx, %ecx
+; KNL_X32-NEXT: addl %edx, %ecx
; KNL_X32-NEXT: kmovw %k1, %edx
-; KNL_X32-NEXT: kshiftrw $11, %k0, %k1
-; KNL_X32-NEXT: andl $1, %ebp
-; KNL_X32-NEXT: shll $6, %ebp
-; KNL_X32-NEXT: andl $1, %esi
-; KNL_X32-NEXT: shll $7, %esi
-; KNL_X32-NEXT: orl %ebp, %esi
+; KNL_X32-NEXT: kshiftrw $10, %k0, %k1
+; KNL_X32-NEXT: addl %ebp, %ecx
; KNL_X32-NEXT: kmovw %k1, %ebp
-; KNL_X32-NEXT: kshiftrw $12, %k0, %k1
-; KNL_X32-NEXT: andl $1, %edi
-; KNL_X32-NEXT: shll $8, %edi
-; KNL_X32-NEXT: orl %esi, %edi
-; KNL_X32-NEXT: kmovw %k1, %esi
-; KNL_X32-NEXT: kshiftrw $13, %k0, %k1
+; KNL_X32-NEXT: kshiftrw $11, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebx
-; KNL_X32-NEXT: shll $9, %ebx
-; KNL_X32-NEXT: orl %edi, %ebx
+; KNL_X32-NEXT: shll $6, %ebx
+; KNL_X32-NEXT: andl $1, %edi
+; KNL_X32-NEXT: shll $7, %edi
+; KNL_X32-NEXT: orl %ebx, %edi
+; KNL_X32-NEXT: kmovw %k1, %ebx
+; KNL_X32-NEXT: kshiftrw $12, %k0, %k1
+; KNL_X32-NEXT: andl $1, %esi
+; KNL_X32-NEXT: shll $8, %esi
+; KNL_X32-NEXT: orl %edi, %esi
; KNL_X32-NEXT: kmovw %k1, %edi
-; KNL_X32-NEXT: kshiftrw $14, %k0, %k1
+; KNL_X32-NEXT: kshiftrw $13, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
-; KNL_X32-NEXT: shll $10, %edx
-; KNL_X32-NEXT: orl %ebx, %edx
-; KNL_X32-NEXT: kmovw %k1, %ebx
-; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
+; KNL_X32-NEXT: shll $9, %edx
+; KNL_X32-NEXT: orl %esi, %edx
+; KNL_X32-NEXT: kmovw %k1, %esi
+; KNL_X32-NEXT: kshiftrw $14, %k0, %k1
; KNL_X32-NEXT: orl %ecx, %edx
-; KNL_X32-NEXT: kmovw %k0, %ecx
+; KNL_X32-NEXT: kmovw %k1, %ecx
+; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: andl $1, %ebp
-; KNL_X32-NEXT: shll $11, %ebp
-; KNL_X32-NEXT: andl $1, %esi
-; KNL_X32-NEXT: shll $12, %esi
-; KNL_X32-NEXT: orl %ebp, %esi
-; KNL_X32-NEXT: andl $1, %edi
-; KNL_X32-NEXT: shll $13, %edi
-; KNL_X32-NEXT: orl %esi, %edi
+; KNL_X32-NEXT: shll $10, %ebp
; KNL_X32-NEXT: andl $1, %ebx
-; KNL_X32-NEXT: shll $14, %ebx
-; KNL_X32-NEXT: orl %edi, %ebx
+; KNL_X32-NEXT: shll $11, %ebx
+; KNL_X...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83691
More information about the llvm-commits
mailing list