[llvm] [SDAG] Fix type checks in `ShrinkDemandedOp` to avoid creating invalid truncates (PR #92730)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 05:03:23 PDT 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/92730
>From f57b846f0728aca0994e557f791edaa2e9b3b94d Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 20 May 2024 17:54:10 +0800
Subject: [PATCH 1/2] [SDAG] Fix type checks in `ShrinkDemandedOp` to avoid
creating invalid truncates.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 6 +-
llvm/test/CodeGen/X86/btc_bts_btr.ll | 6 +-
llvm/test/CodeGen/X86/narrow-shl-cst.ll | 6 +-
llvm/test/CodeGen/X86/pr27202.ll | 4 +-
llvm/test/CodeGen/X86/pr49162.ll | 5 +-
llvm/test/CodeGen/X86/pr92720.ll | 15 ++++
.../CodeGen/X86/scheduler-backtracking.ll | 12 +--
llvm/test/CodeGen/X86/vector-sext.ll | 84 ++++++++-----------
llvm/test/CodeGen/X86/vector-zext.ll | 66 ++++++---------
...ad-of-small-alloca-with-zero-upper-half.ll | 52 ++++++------
.../CodeGen/X86/widen-load-of-small-alloca.ll | 60 ++++++-------
.../CodeGen/X86/zext-logicop-shift-load.ll | 2 +-
12 files changed, 152 insertions(+), 166 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/pr92720.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3ec6b9b795079..32a8dfdd3f75d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -599,7 +599,11 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
- if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
+ // Types of LHS and RHS may differ before legalization (e.g., shl), so we
+ // need to check both.
+ if (TLI.isTruncateFree(Op.getOperand(0).getValueType(), SmallVT) &&
+ TLI.isTruncateFree(Op.getOperand(1).getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, VT)) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll
index efd9d1105d975..e110e5c9274db 100644
--- a/llvm/test/CodeGen/X86/btc_bts_btr.ll
+++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll
@@ -1021,7 +1021,7 @@ define i64 @btr_64_mask_zeros(i64 %x, i64 %n) {
; X64-LABEL: btr_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shll $2, %esi
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btrq %rsi, %rax
; X64-NEXT: retq
;
@@ -1056,7 +1056,7 @@ define i64 @bts_64_mask_zeros(i64 %x, i64 %n) {
; X64-LABEL: bts_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shll $2, %esi
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btsq %rsi, %rax
; X64-NEXT: retq
;
@@ -1088,7 +1088,7 @@ define i64 @btc_64_mask_zeros(i64 %x, i64 %n) {
; X64-LABEL: btc_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shll $2, %esi
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btcq %rsi, %rax
; X64-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/narrow-shl-cst.ll b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
index 296ef52c3bff9..107f14a0e2d2c 100644
--- a/llvm/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
@@ -151,7 +151,7 @@ define i32 @test12(i32 %x, ptr %y) nounwind {
define i64 @test13(i64 %x, ptr %y) nounwind {
; CHECK-LABEL: test13:
; CHECK: # %bb.0:
-; CHECK-NEXT: addl %edi, %edi
+; CHECK-NEXT: addq %rdi, %rdi
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: retq
@@ -212,7 +212,7 @@ define i64 @test18(i64 %x) nounwind {
; CHECK-LABEL: test18:
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: shll $10, %eax
+; CHECK-NEXT: shlq $10, %rax
; CHECK-NEXT: retq
%and = shl i64 %x, 10
%shl = and i64 %and, 261120
@@ -234,7 +234,7 @@ define i64 @test20(i64 %x) nounwind {
; CHECK-LABEL: test20:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl %di, %eax
-; CHECK-NEXT: shll $10, %eax
+; CHECK-NEXT: shlq $10, %rax
; CHECK-NEXT: retq
%and = shl i64 %x, 10
%shl = and i64 %and, 67107840
diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll
index 3bd3be62fb4c8..9da22f635c266 100644
--- a/llvm/test/CodeGen/X86/pr27202.ll
+++ b/llvm/test/CodeGen/X86/pr27202.ll
@@ -45,8 +45,8 @@ define zeroext i1 @g(i32 %x) optsize {
define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
; CHECK-LABEL: PR46237:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: shll $6, %eax
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: shlq $6, %rax
; CHECK-NEXT: movzbl %al, %ecx
; CHECK-NEXT: andl $7, %esi
; CHECK-NEXT: andl $7, %edx
diff --git a/llvm/test/CodeGen/X86/pr49162.ll b/llvm/test/CodeGen/X86/pr49162.ll
index 0e65e121531bf..db8cec61acd6b 100644
--- a/llvm/test/CodeGen/X86/pr49162.ll
+++ b/llvm/test/CodeGen/X86/pr49162.ll
@@ -17,10 +17,7 @@ define ptr @PR49162(ptr %base, ptr %ptr160) {
;
; X64-LABEL: PR49162:
; X64: # %bb.0:
-; X64-NEXT: movl 8(%rsi), %eax
-; X64-NEXT: shll $16, %eax
-; X64-NEXT: cltq
-; X64-NEXT: sarq $16, %rax
+; X64-NEXT: movswq 8(%rsi), %rax
; X64-NEXT: leaq (%rdi,%rax,4), %rax
; X64-NEXT: retq
%load160 = load i160, ptr %ptr160, align 4
diff --git a/llvm/test/CodeGen/X86/pr92720.ll b/llvm/test/CodeGen/X86/pr92720.ll
new file mode 100644
index 0000000000000..b2543c08328c7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr92720.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Make sure we don't crash when shrinking the shift amount before legalization.
+define i64 @pr92720(i64 %x) {
+; CHECK-LABEL: pr92720:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $8589934592, %rax # imm = 0x200000000
+; CHECK-NEXT: retq
+ %or = or i64 %x, 255
+ %sub = sub i64 0, %or
+ %shl = shl i64 1, %sub
+ %sext = shl i64 %shl, 32
+ ret i64 %sext
+}
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 785b97d8c2402..53d3367cce4d3 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -13,7 +13,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP-LABEL: test1:
; ILP: # %bb.0:
; ILP-NEXT: movq %rdi, %rax
-; ILP-NEXT: leal (%rsi,%rsi), %ecx
+; ILP-NEXT: leaq (%rsi,%rsi), %rcx
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
@@ -43,7 +43,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: shlq %cl, %rsi
; ILP-NEXT: notb %cl
; ILP-NEXT: shrq %rdx
-; ILP-NEXT: # kill: def $cl killed $cl killed $ecx
+; ILP-NEXT: # kill: def $cl killed $cl killed $rcx
; ILP-NEXT: shrq %cl, %rdx
; ILP-NEXT: orq %rsi, %rdx
; ILP-NEXT: movq %rdx, 16(%rax)
@@ -60,7 +60,7 @@ define i256 @test1(i256 %a) nounwind {
; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp)
-; HYBRID-NEXT: addl %esi, %esi
+; HYBRID-NEXT: addq %rsi, %rsi
; HYBRID-NEXT: addb $3, %sil
; HYBRID-NEXT: movl %esi, %ecx
; HYBRID-NEXT: andb $7, %cl
@@ -97,7 +97,7 @@ define i256 @test1(i256 %a) nounwind {
; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp)
-; BURR-NEXT: addl %esi, %esi
+; BURR-NEXT: addq %rsi, %rsi
; BURR-NEXT: addb $3, %sil
; BURR-NEXT: movl %esi, %ecx
; BURR-NEXT: andb $7, %cl
@@ -126,7 +126,7 @@ define i256 @test1(i256 %a) nounwind {
; SRC-LABEL: test1:
; SRC: # %bb.0:
; SRC-NEXT: movq %rdi, %rax
-; SRC-NEXT: addl %esi, %esi
+; SRC-NEXT: addq %rsi, %rsi
; SRC-NEXT: addb $3, %sil
; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp)
@@ -167,7 +167,7 @@ define i256 @test1(i256 %a) nounwind {
; LIN-LABEL: test1:
; LIN: # %bb.0:
; LIN-NEXT: movq %rdi, %rax
-; LIN-NEXT: leal (%rsi,%rsi), %edx
+; LIN-NEXT: leaq (%rsi,%rsi), %rdx
; LIN-NEXT: addb $3, %dl
; LIN-NEXT: movl %edx, %ecx
; LIN-NEXT: shrb $3, %cl
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 85c1e25c29ed5..739d5b1d32e86 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -3615,13 +3615,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movl 8(%rdi), %ecx
-; SSE2-NEXT: shll $28, %ecx
-; SSE2-NEXT: movq %rax, %rdx
-; SSE2-NEXT: shrq $51, %rdx
-; SSE2-NEXT: shll $15, %edx
-; SSE2-NEXT: orl %ecx, %edx
-; SSE2-NEXT: sarl $15, %edx
-; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: shldq $13, %rax, %rcx
+; SSE2-NEXT: shll $15, %ecx
+; SSE2-NEXT: sarl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: shrq $34, %rax
; SSE2-NEXT: shll $15, %eax
; SSE2-NEXT: sarl $15, %eax
@@ -3644,13 +3641,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movl 8(%rdi), %ecx
-; SSSE3-NEXT: shll $28, %ecx
-; SSSE3-NEXT: movq %rax, %rdx
-; SSSE3-NEXT: shrq $51, %rdx
-; SSSE3-NEXT: shll $15, %edx
-; SSSE3-NEXT: orl %ecx, %edx
-; SSSE3-NEXT: sarl $15, %edx
-; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: shldq $13, %rax, %rcx
+; SSSE3-NEXT: shll $15, %ecx
+; SSSE3-NEXT: sarl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: shrq $34, %rax
; SSSE3-NEXT: shll $15, %eax
; SSSE3-NEXT: sarl $15, %eax
@@ -3662,53 +3656,47 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
; SSE41-LABEL: sext_4i17_to_4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movq (%rdi), %rax
-; SSE41-NEXT: movq %rax, %rcx
-; SSE41-NEXT: shrq $17, %rcx
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: movq %rax, %rdx
+; SSE41-NEXT: movl 8(%rdi), %esi
+; SSE41-NEXT: shldq $13, %rax, %rsi
+; SSE41-NEXT: shrq $17, %rax
+; SSE41-NEXT: shll $15, %eax
+; SSE41-NEXT: sarl $15, %eax
; SSE41-NEXT: shll $15, %ecx
; SSE41-NEXT: sarl $15, %ecx
-; SSE41-NEXT: movl %eax, %edx
+; SSE41-NEXT: movd %ecx, %xmm0
+; SSE41-NEXT: pinsrd $1, %eax, %xmm0
+; SSE41-NEXT: shrq $34, %rdx
; SSE41-NEXT: shll $15, %edx
; SSE41-NEXT: sarl $15, %edx
-; SSE41-NEXT: movd %edx, %xmm0
-; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT: movq %rax, %rcx
-; SSE41-NEXT: shrq $34, %rcx
-; SSE41-NEXT: shll $15, %ecx
-; SSE41-NEXT: sarl $15, %ecx
-; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
-; SSE41-NEXT: movl 8(%rdi), %ecx
-; SSE41-NEXT: shll $28, %ecx
-; SSE41-NEXT: shrq $51, %rax
-; SSE41-NEXT: shll $15, %eax
-; SSE41-NEXT: orl %ecx, %eax
-; SSE41-NEXT: sarl $15, %eax
-; SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; SSE41-NEXT: pinsrd $2, %edx, %xmm0
+; SSE41-NEXT: shll $15, %esi
+; SSE41-NEXT: sarl $15, %esi
+; SSE41-NEXT: pinsrd $3, %esi, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: sext_4i17_to_4i32:
; AVX: # %bb.0:
; AVX-NEXT: movq (%rdi), %rax
-; AVX-NEXT: movq %rax, %rcx
-; AVX-NEXT: shrq $17, %rcx
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: movq %rax, %rdx
+; AVX-NEXT: movl 8(%rdi), %esi
+; AVX-NEXT: shldq $13, %rax, %rsi
+; AVX-NEXT: shrq $17, %rax
+; AVX-NEXT: shll $15, %eax
+; AVX-NEXT: sarl $15, %eax
; AVX-NEXT: shll $15, %ecx
; AVX-NEXT: sarl $15, %ecx
-; AVX-NEXT: movl %eax, %edx
+; AVX-NEXT: vmovd %ecx, %xmm0
+; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: shrq $34, %rdx
; AVX-NEXT: shll $15, %edx
; AVX-NEXT: sarl $15, %edx
-; AVX-NEXT: vmovd %edx, %xmm0
-; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rcx
-; AVX-NEXT: shrq $34, %rcx
-; AVX-NEXT: shll $15, %ecx
-; AVX-NEXT: sarl $15, %ecx
-; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX-NEXT: movl 8(%rdi), %ecx
-; AVX-NEXT: shll $28, %ecx
-; AVX-NEXT: shrq $51, %rax
-; AVX-NEXT: shll $15, %eax
-; AVX-NEXT: orl %ecx, %eax
-; AVX-NEXT: sarl $15, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
+; AVX-NEXT: shll $15, %esi
+; AVX-NEXT: sarl $15, %esi
+; AVX-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-SSE2-LABEL: sext_4i17_to_4i32:
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 74926f46ffa43..25c438cc4c4fc 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2332,11 +2332,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movl 8(%rdi), %ecx
-; SSE2-NEXT: shll $13, %ecx
-; SSE2-NEXT: movq %rax, %rdx
-; SSE2-NEXT: shrq $51, %rdx
-; SSE2-NEXT: orl %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: shldq $13, %rax, %rcx
+; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: shrq $34, %rax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2353,11 +2350,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movl 8(%rdi), %ecx
-; SSSE3-NEXT: shll $13, %ecx
-; SSSE3-NEXT: movq %rax, %rdx
-; SSSE3-NEXT: shrq $51, %rdx
-; SSSE3-NEXT: orl %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: shldq $13, %rax, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: shrq $34, %rax
; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2367,15 +2361,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; SSE41-LABEL: zext_4i17_to_4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movl 8(%rdi), %eax
-; SSE41-NEXT: shll $13, %eax
-; SSE41-NEXT: movq (%rdi), %rcx
-; SSE41-NEXT: movq %rcx, %rdx
-; SSE41-NEXT: shrq $51, %rdx
-; SSE41-NEXT: orl %eax, %edx
-; SSE41-NEXT: movq %rcx, %rax
+; SSE41-NEXT: movq (%rdi), %rax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: movl 8(%rdi), %edx
+; SSE41-NEXT: shldq $13, %rax, %rdx
; SSE41-NEXT: shrq $17, %rax
-; SSE41-NEXT: movd %ecx, %xmm0
; SSE41-NEXT: pinsrd $1, %eax, %xmm0
; SSE41-NEXT: shrq $34, %rcx
; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
@@ -2385,15 +2376,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; AVX1-LABEL: zext_4i17_to_4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl 8(%rdi), %eax
-; AVX1-NEXT: shll $13, %eax
-; AVX1-NEXT: movq (%rdi), %rcx
-; AVX1-NEXT: movq %rcx, %rdx
-; AVX1-NEXT: shrq $51, %rdx
-; AVX1-NEXT: orl %eax, %edx
-; AVX1-NEXT: movq %rcx, %rax
+; AVX1-NEXT: movq (%rdi), %rax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: movl 8(%rdi), %edx
+; AVX1-NEXT: shldq $13, %rax, %rdx
; AVX1-NEXT: shrq $17, %rax
-; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX1-NEXT: shrq $34, %rcx
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2403,15 +2391,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; AVX2-LABEL: zext_4i17_to_4i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl 8(%rdi), %eax
-; AVX2-NEXT: shll $13, %eax
-; AVX2-NEXT: movq (%rdi), %rcx
-; AVX2-NEXT: movq %rcx, %rdx
-; AVX2-NEXT: shrq $51, %rdx
-; AVX2-NEXT: orl %eax, %edx
-; AVX2-NEXT: movq %rcx, %rax
+; AVX2-NEXT: movq (%rdi), %rax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: movl 8(%rdi), %edx
+; AVX2-NEXT: shldq $13, %rax, %rdx
; AVX2-NEXT: shrq $17, %rax
-; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX2-NEXT: shrq $34, %rcx
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2422,15 +2407,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; AVX512-LABEL: zext_4i17_to_4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: movl 8(%rdi), %eax
-; AVX512-NEXT: shll $13, %eax
-; AVX512-NEXT: movq (%rdi), %rcx
-; AVX512-NEXT: movq %rcx, %rdx
-; AVX512-NEXT: shrq $51, %rdx
-; AVX512-NEXT: orl %eax, %edx
-; AVX512-NEXT: movq %rcx, %rax
+; AVX512-NEXT: movq (%rdi), %rax
+; AVX512-NEXT: vmovd %eax, %xmm0
+; AVX512-NEXT: movq %rax, %rcx
+; AVX512-NEXT: movl 8(%rdi), %edx
+; AVX512-NEXT: shldq $13, %rax, %rdx
; AVX512-NEXT: shrq $17, %rax
-; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512-NEXT: shrq $34, %rcx
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index 9ae1f270e8833..2611399458c27 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -171,16 +171,16 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movb %al, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: movl (%rdi), %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: movb %al, (%rdx)
@@ -248,16 +248,16 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: movl (%rdi), %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: movw %ax, (%rdx)
@@ -324,16 +324,16 @@ define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: movl (%rdi), %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: movl %eax, (%rdx)
@@ -402,7 +402,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl
@@ -414,7 +414,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-SHLD: # %bb.0:
; X64-SHLD-NEXT: movq %rsi, %rcx
; X64-SHLD-NEXT: movq (%rdi), %rax
-; X64-SHLD-NEXT: shll $3, %ecx
+; X64-SHLD-NEXT: shlq $3, %rcx
; X64-SHLD-NEXT: xorl %esi, %esi
; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax
; X64-SHLD-NEXT: testb $64, %cl
@@ -424,7 +424,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -475,7 +475,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl
@@ -487,7 +487,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-SHLD: # %bb.0:
; X64-SHLD-NEXT: movq %rsi, %rcx
; X64-SHLD-NEXT: movq (%rdi), %rax
-; X64-SHLD-NEXT: shll $3, %ecx
+; X64-SHLD-NEXT: shlq $3, %rcx
; X64-SHLD-NEXT: xorl %esi, %esi
; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax
; X64-SHLD-NEXT: testb $64, %cl
@@ -497,7 +497,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -547,7 +547,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl
@@ -559,7 +559,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-SHLD: # %bb.0:
; X64-SHLD-NEXT: movq %rsi, %rcx
; X64-SHLD-NEXT: movq (%rdi), %rax
-; X64-SHLD-NEXT: shll $3, %ecx
+; X64-SHLD-NEXT: shlq $3, %rcx
; X64-SHLD-NEXT: xorl %esi, %esi
; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax
; X64-SHLD-NEXT: testb $64, %cl
@@ -569,7 +569,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -619,7 +619,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl
@@ -631,7 +631,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-SHLD: # %bb.0:
; X64-SHLD-NEXT: movq %rsi, %rcx
; X64-SHLD-NEXT: movq (%rdi), %rax
-; X64-SHLD-NEXT: shll $3, %ecx
+; X64-SHLD-NEXT: shlq $3, %rcx
; X64-SHLD-NEXT: xorl %esi, %esi
; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax
; X64-SHLD-NEXT: testb $64, %cl
@@ -641,7 +641,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
;
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, (%rdi), %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -692,7 +692,7 @@ define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half:
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -759,7 +759,7 @@ define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half:
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -825,7 +825,7 @@ define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half:
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -891,7 +891,7 @@ define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half:
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -959,7 +959,7 @@ define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i
; X64-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half:
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
index 929671d674e5e..38a3a0ca7ab05 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
@@ -169,15 +169,15 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movb %al, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: movb %al, (%rdx)
; X64-BMI2-NEXT: retq
@@ -293,15 +293,15 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: movw %ax, (%rdx)
; X64-BMI2-NEXT: retq
@@ -414,15 +414,15 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: movl %eax, (%rdx)
; X64-BMI2-NEXT: retq
@@ -537,7 +537,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -558,7 +558,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -572,7 +572,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -592,7 +592,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -645,7 +645,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -666,7 +666,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -680,7 +680,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -700,7 +700,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -752,7 +752,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -773,7 +773,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -787,7 +787,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -807,7 +807,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -859,7 +859,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
@@ -880,7 +880,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -894,7 +894,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlq $3, %rsi
; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
@@ -914,7 +914,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlq $3, %rcx
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi
@@ -971,7 +971,7 @@ define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu 16(%rdi), %xmm1
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1042,7 +1042,7 @@ define void @load_2byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu 16(%rdi), %xmm1
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1112,7 +1112,7 @@ define void @load_4byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu 16(%rdi), %xmm1
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1182,7 +1182,7 @@ define void @load_8byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu 16(%rdi), %xmm1
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1254,7 +1254,7 @@ define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst
; X64: # %bb.0:
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu 16(%rdi), %xmm1
-; X64-NEXT: shll $3, %esi
+; X64-NEXT: shlq $3, %rsi
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
index 3f64a383abd2c..9d9e13dd8dfe5 100644
--- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
+++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
@@ -15,7 +15,7 @@ define i64 @test1(ptr %data) {
; X64-LABEL: test1:
; X64: # %bb.0: # %entry
; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: shll $2, %eax
+; X64-NEXT: shlq $2, %rax
; X64-NEXT: andl $60, %eax
; X64-NEXT: retq
entry:
>From 93a276ff49c6fdfd4745c99e828bbcaed9ba84a4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 20 May 2024 20:02:48 +0800
Subject: [PATCH 2/2] [SDAG] Fix bitwidth checks.
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 32a8dfdd3f75d..acb3debf80d8b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -596,11 +596,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned DemandedSize = DemandedBits.getActiveBits();
+ // Types of LHS and RHS may differ before legalization (e.g., shl), so we
+ // need to check both.
+ unsigned MinWidth =
+ std::min(Op.getOperand(0).getValueType().getScalarSizeInBits(),
+ Op.getOperand(1).getValueType().getScalarSizeInBits());
for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
- SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ SmallVTBits < MinWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
- // Types of LHS and RHS may differ before legalization (e.g., shl), so we
- // need to check both.
if (TLI.isTruncateFree(Op.getOperand(0).getValueType(), SmallVT) &&
TLI.isTruncateFree(Op.getOperand(1).getValueType(), SmallVT) &&
TLI.isZExtFree(SmallVT, VT)) {
More information about the llvm-commits
mailing list