[llvm] [X86] Avoid trunc/zext in narrow shift by working over i32 (PR #185539)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 16:49:28 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Gergo Stomfai (stomfaig)
<details>
<summary>Changes</summary>
Closes #<!-- -->168828
---
Patch is 22.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/185539.diff
12 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+23)
- (modified) llvm/test/CodeGen/X86/buildvec-insertvec.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/divide-by-constant.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/fshl.ll (+5-3)
- (modified) llvm/test/CodeGen/X86/fshr.ll (+5-3)
- (added) llvm/test/CodeGen/X86/issue168828.ll (+27)
- (modified) llvm/test/CodeGen/X86/llvm.frexp.ll (+23-23)
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+4-2)
- (modified) llvm/test/CodeGen/X86/midpoint-int.ll (+26-26)
- (modified) llvm/test/CodeGen/X86/popcnt.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr45995.ll (+19-21)
- (modified) llvm/test/CodeGen/X86/shift-mask.ll (+14-30)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6384c4d58a480..fdb810cdb31b8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57472,6 +57472,29 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // (zext (srl (trunc X), amt)) -> (and (srl X, amt), (TruncMask >> amt))
+ // Restricted to VT == i32
+ if (N0->getOpcode() == ISD::SRL) {
+ SDValue N00 = N0->getOperand(0);
+ if (N00->getOpcode() == ISD::TRUNCATE) {
+ SDValue OriginalVal = N00->getOperand(0);
+ EVT TruncVT = N00.getValueType();
+
+ if (VT == MVT::i32 && OriginalVal.getValueType() == VT) {
+ auto *ShiftC = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!ShiftC)
+ return SDValue();
+ APInt TruncMask = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
+ TruncVT.getScalarSizeInBits());
+ SDValue NewShift =
+ DAG.getNode(ISD::SRL, dl, VT, OriginalVal, N0->getOperand(1));
+ APInt ShiftedMask = TruncMask.lshr(ShiftC->getAPIntValue());
+ return DAG.getNode(ISD::AND, dl, VT, NewShift,
+ DAG.getConstant(ShiftedMask, dl, VT));
+ }
+ }
+ }
+
// (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
// FIXME: Is this needed? We don't seem to have any tests for it.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&
diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
index 4b0e5441b4abf..d6fbed75b56fa 100644
--- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
@@ -725,10 +725,10 @@ define <16 x i8> @test_buildvector_v16i8_register_zero_2(i8 %a2, i8 %a3, i8 %a6,
define void @PR46461(i16 %x, ptr %y) {
; SSE-LABEL: PR46461:
; SSE: # %bb.0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: shrl %edi
+; SSE-NEXT: andl $32767, %edi # imm = 0x7FFF
+; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: movdqa %xmm0, 48(%rsi)
; SSE-NEXT: movdqa %xmm0, 32(%rsi)
; SSE-NEXT: movdqa %xmm0, 16(%rsi)
@@ -737,10 +737,10 @@ define void @PR46461(i16 %x, ptr %y) {
;
; AVX1-LABEL: PR46461:
; AVX1: # %bb.0:
-; AVX1-NEXT: movzwl %di, %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: shrl %edi
+; AVX1-NEXT: andl $32767, %edi # imm = 0x7FFF
+; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps %ymm0, 32(%rsi)
; AVX1-NEXT: vmovaps %ymm0, (%rsi)
@@ -749,9 +749,9 @@ define void @PR46461(i16 %x, ptr %y) {
;
; AVX2-LABEL: PR46461:
; AVX2: # %bb.0:
-; AVX2-NEXT: movzwl %di, %eax
-; AVX2-NEXT: shrl %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: shrl %edi
+; AVX2-NEXT: andl $32767, %edi # imm = 0x7FFF
+; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, 32(%rsi)
; AVX2-NEXT: vmovdqa %ymm0, (%rsi)
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index ac78136b9d8ea..b20c9f30c2a2b 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -172,9 +172,9 @@ define i8 @test8(i8 %x) nounwind {
;
; X64-LABEL: test8:
; X64: # %bb.0:
-; X64-NEXT: shrb %dil
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: imull $211, %eax, %eax
+; X64-NEXT: shrl %edi
+; X64-NEXT: andl $127, %edi
+; X64-NEXT: imull $211, %edi, %eax
; X64-NEXT: shrl $13, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
@@ -195,9 +195,9 @@ define i8 @test9(i8 %x) nounwind {
;
; X64-LABEL: test9:
; X64: # %bb.0:
-; X64-NEXT: shrb $2, %dil
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: imull $71, %eax, %eax
+; X64-NEXT: shrl $2, %edi
+; X64-NEXT: andl $63, %edi
+; X64-NEXT: imull $71, %edi, %eax
; X64-NEXT: shrl $11, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index 9da2640ea8392..3c4331b9da15a 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -513,10 +513,12 @@ define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
;
; X64-SLOW-LABEL: const_shift_i16:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movzwl %si, %eax
+; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SLOW-NEXT: shll $7, %edi
-; X64-SLOW-NEXT: shrl $9, %eax
-; X64-SLOW-NEXT: orl %edi, %eax
+; X64-SLOW-NEXT: shrl $9, %esi
+; X64-SLOW-NEXT: andl $127, %esi
+; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax
; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 7)
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index c307833e488c9..127469f749812 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -493,10 +493,12 @@ define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
;
; X64-SLOW-LABEL: const_shift_i16:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movzwl %si, %eax
+; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
; X64-SLOW-NEXT: shll $9, %edi
-; X64-SLOW-NEXT: shrl $7, %eax
-; X64-SLOW-NEXT: orl %edi, %eax
+; X64-SLOW-NEXT: shrl $7, %esi
+; X64-SLOW-NEXT: andl $511, %esi # imm = 0x1FF
+; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax
; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 7)
diff --git a/llvm/test/CodeGen/X86/issue168828.ll b/llvm/test/CodeGen/X86/issue168828.ll
new file mode 100644
index 0000000000000..86aafcfe0706f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/issue168828.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s --check-prefix=ASM
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell -debug-only=isel < %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DAG
+; REQUIRES: asserts
+
+; Verify combineZext fires: (zext (srl (trunc X), amt)) -> (and (srl X, amt), mask)
+; and that cttz with a known-nonzero input (bit 8 always set) becomes cttz_zero_undef.
+
+define i32 @tgt(i8 %a0) {
+; ASM-LABEL: tgt:
+; ASM: # %bb.0:
+; ASM-NEXT: shrl %edi
+; ASM-NEXT: andl $127, %edi
+; ASM-NEXT: orl $256, %edi # imm = 0x100
+; ASM-NEXT: tzcntl %edi, %eax
+; ASM-NEXT: retq
+
+; DAG-LABEL: Optimized lowered selection DAG: %bb.0 'tgt:'
+; DAG: i32 = srl
+; DAG-NEXT: i32 = and {{t[0-9]+}}, Constant:i32<127>
+; DAG: i32 = cttz_zero_undef
+ %s = lshr i8 %a0, 1
+ %x = zext i8 %s to i32
+ %m = or i32 %x, 256
+ %r = call i32 @llvm.cttz.i32(i32 %m, i1 0)
+ ret i32 %r
+}
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.ll b/llvm/test/CodeGen/X86/llvm.frexp.ll
index e3a1b1b83b2e3..02327e513690a 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.ll
@@ -11,24 +11,23 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: callq __truncsfhf2 at PLT
; X64-NEXT: pextrw $0, %xmm0, %ecx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: andl $31744, %eax # imm = 0x7C00
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: pextrw $0, %xmm0, %edx
-; X64-NEXT: movl %edx, %esi
-; X64-NEXT: andl $32767, %esi # imm = 0x7FFF
-; X64-NEXT: cmpl $1024, %esi # imm = 0x400
+; X64-NEXT: movl %edx, %eax
+; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
+; X64-NEXT: cmpl $1024, %eax # imm = 0x400
; X64-NEXT: cmovael %edx, %ecx
-; X64-NEXT: cmovael %esi, %eax
-; X64-NEXT: shrl $10, %eax
-; X64-NEXT: leal -12(%rax), %edi
-; X64-NEXT: cmpl $1024, %esi # imm = 0x400
-; X64-NEXT: cmovael %eax, %edi
+; X64-NEXT: movl %ecx, %esi
+; X64-NEXT: shrl $10, %esi
+; X64-NEXT: andl $31, %esi
+; X64-NEXT: leal -12(%rsi), %edi
+; X64-NEXT: cmpl $1024, %eax # imm = 0x400
+; X64-NEXT: cmovael %esi, %edi
; X64-NEXT: addl $-14, %edi
; X64-NEXT: andl $-31745, %ecx # imm = 0x83FF
; X64-NEXT: orl $14336, %ecx # imm = 0x3800
-; X64-NEXT: addl $-31744, %esi # imm = 0x8400
-; X64-NEXT: movzwl %si, %esi
+; X64-NEXT: addl $-31744, %eax # imm = 0x8400
+; X64-NEXT: movzwl %ax, %esi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl $33792, %esi # imm = 0x8400
; X64-NEXT: cmoval %edi, %eax
@@ -116,22 +115,23 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; X64-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: callq __truncsfhf2 at PLT
; X64-NEXT: pextrw $0, %xmm0, %eax
-; X64-NEXT: andl $31744, %eax # imm = 0x7C00
; X64-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: pextrw $0, %xmm0, %ecx
-; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; X64-NEXT: cmpl $1024, %ecx # imm = 0x400
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: andl $32767, %edx # imm = 0x7FFF
+; X64-NEXT: cmpl $1024, %edx # imm = 0x400
; X64-NEXT: cmovael %ecx, %eax
; X64-NEXT: shrl $10, %eax
-; X64-NEXT: leal -12(%rax), %edx
-; X64-NEXT: cmpl $1024, %ecx # imm = 0x400
-; X64-NEXT: cmovael %eax, %edx
-; X64-NEXT: addl $-14, %edx
-; X64-NEXT: addl $-31744, %ecx # imm = 0x8400
-; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: andl $31, %eax
+; X64-NEXT: leal -12(%rax), %ecx
+; X64-NEXT: cmpl $1024, %edx # imm = 0x400
+; X64-NEXT: cmovael %eax, %ecx
+; X64-NEXT: addl $-14, %ecx
+; X64-NEXT: addl $-31744, %edx # imm = 0x8400
+; X64-NEXT: movzwl %dx, %edx
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl $33792, %ecx # imm = 0x8400
-; X64-NEXT: cmoval %edx, %eax
+; X64-NEXT: cmpl $33792, %edx # imm = 0x8400
+; X64-NEXT: cmoval %ecx, %eax
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll
index 99a8918fef93f..1571909e35b6c 100644
--- a/llvm/test/CodeGen/X86/masked_load.ll
+++ b/llvm/test/CodeGen/X86/masked_load.ll
@@ -727,7 +727,8 @@ define <8 x double> @load_v8f64_i8(i8 %trigger, ptr %addr, <8 x double> %dst) {
; AVX1-NEXT: shrb $6, %al
; AVX1-NEXT: andb $1, %al
; AVX1-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX1-NEXT: shrb $7, %dil
+; AVX1-NEXT: shrl $7, %edi
+; AVX1-NEXT: andl $1, %edi
; AVX1-NEXT: vpinsrb $14, %edi, %xmm3, %xmm3
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
@@ -776,7 +777,8 @@ define <8 x double> @load_v8f64_i8(i8 %trigger, ptr %addr, <8 x double> %dst) {
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: andb $1, %al
; AVX2-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX2-NEXT: shrb $7, %dil
+; AVX2-NEXT: shrl $7, %edi
+; AVX2-NEXT: andl $1, %edi
; AVX2-NEXT: vpinsrb $14, %edi, %xmm3, %xmm3
; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX2-NEXT: vpslld $31, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll
index c058e37e0ce11..460a202440de6 100644
--- a/llvm/test/CodeGen/X86/midpoint-int.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int.ll
@@ -641,14 +641,14 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; X64-NEXT: cmpw %si, %di
; X64-NEXT: setle %al
; X64-NEXT: leal -1(%rax,%rax), %ecx
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: movswl %di, %edx
-; X64-NEXT: movswl %si, %esi
-; X64-NEXT: subl %edx, %esi
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: subl %esi, %edx
+; X64-NEXT: movswl %di, %r8d
+; X64-NEXT: movswl %si, %eax
+; X64-NEXT: subl %r8d, %eax
+; X64-NEXT: cmovll %edx, %eax
; X64-NEXT: shrl %eax
+; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
@@ -668,8 +668,8 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
; X86-NEXT: # %bb.1:
; X86-NEXT: negl %eax
; X86-NEXT: .LBB10_2:
-; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
+; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
@@ -693,14 +693,14 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; X64-NEXT: cmpw %di, %si
; X64-NEXT: setae %al
; X64-NEXT: leal -1(%rax,%rax), %ecx
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: movzwl %di, %edx
-; X64-NEXT: movzwl %si, %esi
-; X64-NEXT: subl %edx, %esi
-; X64-NEXT: cmovbl %eax, %esi
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: subl %esi, %edx
+; X64-NEXT: movzwl %di, %r8d
; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: subl %r8d, %eax
+; X64-NEXT: cmovbl %edx, %eax
; X64-NEXT: shrl %eax
+; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
@@ -720,8 +720,8 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
; X86-NEXT: # %bb.1:
; X86-NEXT: negl %eax
; X86-NEXT: .LBB11_2:
-; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
+; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
@@ -748,13 +748,13 @@ define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; X64-NEXT: cmpw %si, %cx
; X64-NEXT: setle %al
; X64-NEXT: leal -1(%rax,%rax), %edx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: movswl %si, %esi
-; X64-NEXT: subl %ecx, %esi
-; X64-NEXT: cmovll %eax, %esi
-; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: movl %ecx, %edi
+; X64-NEXT: subl %esi, %edi
+; X64-NEXT: movswl %si, %eax
+; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: cmovll %edi, %eax
; X64-NEXT: shrl %eax
+; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
; X64-NEXT: imull %edx, %eax
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
@@ -775,8 +775,8 @@ define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind {
; X86-NEXT: # %bb.1:
; X86-NEXT: negl %eax
; X86-NEXT: .LBB12_2:
-; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
+; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
@@ -807,8 +807,8 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
; X64-NEXT: movswl %di, %esi
; X64-NEXT: subl %esi, %eax
; X64-NEXT: cmovll %edx, %eax
-; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: shrl %eax
+; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
@@ -829,8 +829,8 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind {
; X86-NEXT: # %bb.1:
; X86-NEXT: negl %eax
; X86-NEXT: .LBB13_2:
-; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
+; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
@@ -861,8 +861,8 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X64-NEXT: subl %eax, %esi
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: cmovll %esi, %eax
-; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: shrl %eax
+; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
; X64-NEXT: imull %edx, %eax
; X64-NEXT: addl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
@@ -884,8 +884,8 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind {
; X86-NEXT: # %bb.1:
; X86-NEXT: negl %eax
; X86-NEXT: .LBB14_2:
-; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: shrl %eax
+; X86-NEXT: andl $32767, %eax # imm = 0x7FFF
; X86-NEXT: imull %edx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 3004b8b72fcc5..2b8aeb43d351d 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -50,7 +50,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
define i16 @cnt16(i16 %x) nounwind readnone {
; X86-LABEL: cnt16:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shrl %ecx
; X86-NEXT: andl $21845, %ecx # imm = 0x5555
@@ -1823,7 +1823,7 @@ define i32 @popcount_zext_i32(i16 zeroext %x) {
define i32 @popcount_i16_zext(i16 zeroext %x) {
; X86-LABEL: popcount_i16_zext:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shrl %ecx
; X86-NEXT: andl $21845, %ecx # imm = 0x5555
diff --git a/llvm/test/CodeGen/X86/pr45995.ll b/llvm/test/CodeGen/X86/pr45995.ll
index 997ad6be84b9d..44e36dd1079c1 100644
--- a/llvm/test/CodeGen/X86/pr45995.ll
+++ b/llvm/test/CodeGen/X86/pr45995.ll
@@ -8,21 +8,21 @@ define void @extracter0([4 x <4 x i1>] %matrix) nounwind {
; CHECK-NEXT: push r14
; CHECK-NEXT: push rbx
; CHECK-NEXT: vpslld xmm0, xmm0, 31
-; CHECK-NEXT: vmovmskps edi, xmm0
-; CHECK-NEXT: mov ebx, edi
-; CHECK-NEXT: shr bl, 3
-; CHECK-NEXT: mov ebp, edi
+; CHECK-NEXT: vmovmskps ebx, xmm0
+; CHECK-NEXT: mov ebp, ebx
; CHECK-NEXT: and bpl, 4
; CHECK-NEXT: shr bpl, 2
-; CHECK-NEXT: mov r14d, edi
+; CHECK-NEXT: mov r14d, ebx
; CHECK-NEXT: and r14b, 2
; CHECK-NEXT: shr r14b
+; CHECK-NEXT: mov edi, ebx
; CHECK-NEXT: call print_i1 at PLT
; CHECK-NEXT: movzx edi, r14b
; CHECK-NEXT: call print_i1 at PLT
; CHECK-NEXT: movzx edi, bpl
; CHECK-NEXT: call print_i1 at PLT
-; CHECK-NEXT: movzx edi, bl
+; CHECK-NEXT: shr ebx, 3
+; CHECK-NEXT: mov edi, ebx
; CHECK-NEXT: call print_i1 at PLT
; CHECK-NEXT: pop rbx
; CHECK-NEXT: pop r14
@@ -52,9 +52,6 @@ define void @extracter1([4 x <4 x i1>] %matrix) nounwind {
; CHECK-NEXT: push rax
; CHECK-NEXT: vpslld xmm1, xmm1, 31
; CHECK-NEXT: vmovmskps ebx, xmm1
-; CHECK-NEXT: mov eax, ebx
-; CHECK-NEXT: shr al, 3
-; CHECK-NEXT: mov byte ptr [rsp + 7], al # 1-byte Spill
; CHECK-NEXT: mov r14d, ebx
; CHECK-NEXT: and r14b, 4
; CHECK-NEXT: shr r14b, 2
@@ -62,29 +59,30 @@ define void @extracter1([4 x <4 x i1>] %matrix) nounwind {
; CHECK-NEXT: and r15b, 2
; CHECK-NEXT: shr r15b
; CHECK-NEXT: vpslld xmm0, xmm0, 31
-; CHECK-NEXT: vmovmskps edi, xmm0
-; CHECK-NEXT: mov r12d, edi
-; CHECK-NEXT: shr r12b, 3
-; CHECK-NEXT: mov r13d, edi
-; CHECK-NEXT: and r13b, 4
-; CHECK-NEXT: shr r13b, 2
-; CHECK-NEXT: mov ebp, edi
-; CHECK-NEXT: and bpl, 2
-; CHECK-NEXT: shr bpl
-; CHECK-NEXT: call print_i1 at PLT
-; CHECK-NEXT: movzx edi, bpl
+; CHECK-NEXT: vmovmskps ebp, xmm0
+; CHECK-NEXT: mov r12d, ebp
+; CHECK-NEXT: and r12b, 4
+; CHECK-NEXT: shr r12b, 2
+; CHECK-NEXT: mov r13d, ebp
+; CHECK-NEXT: and r13b, 2
+; CHECK-NEXT: shr r13b
+; CHECK-NEXT: mov edi, ebp
; CHECK-NEXT: call print_i1 at PLT
; CHECK-NEXT: movzx edi, r13b
; CHECK-NEXT: call print_i1 at PLT
; CHECK-NEXT: movzx edi, r12b
; CHECK-NEXT: call pr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/185539
More information about the llvm-commits
mailing list