[llvm] [SDAG] Fix type checks in `ShrinkDemandedOp` to avoid creating invalid truncates (PR #92730)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon May 20 05:03:23 PDT 2024


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/92730

>From f57b846f0728aca0994e557f791edaa2e9b3b94d Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 20 May 2024 17:54:10 +0800
Subject: [PATCH 1/2] [SDAG] Fix type checks in `ShrinkDemandedOp` to avoid
 creating invalid truncates.

---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  6 +-
 llvm/test/CodeGen/X86/btc_bts_btr.ll          |  6 +-
 llvm/test/CodeGen/X86/narrow-shl-cst.ll       |  6 +-
 llvm/test/CodeGen/X86/pr27202.ll              |  4 +-
 llvm/test/CodeGen/X86/pr49162.ll              |  5 +-
 llvm/test/CodeGen/X86/pr92720.ll              | 15 ++++
 .../CodeGen/X86/scheduler-backtracking.ll     | 12 +--
 llvm/test/CodeGen/X86/vector-sext.ll          | 84 ++++++++-----------
 llvm/test/CodeGen/X86/vector-zext.ll          | 66 ++++++---------
 ...ad-of-small-alloca-with-zero-upper-half.ll | 52 ++++++------
 .../CodeGen/X86/widen-load-of-small-alloca.ll | 60 ++++++-------
 .../CodeGen/X86/zext-logicop-shift-load.ll    |  2 +-
 12 files changed, 152 insertions(+), 166 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr92720.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3ec6b9b795079..32a8dfdd3f75d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -599,7 +599,11 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
-    if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
+    // Types of LHS and RHS may differ before legalization (e.g., shl), so we
+    // need to check both.
+    if (TLI.isTruncateFree(Op.getOperand(0).getValueType(), SmallVT) &&
+        TLI.isTruncateFree(Op.getOperand(1).getValueType(), SmallVT) &&
+        TLI.isZExtFree(SmallVT, VT)) {
       // We found a type with free casts.
       SDValue X = DAG.getNode(
           Op.getOpcode(), dl, SmallVT,
diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll
index efd9d1105d975..e110e5c9274db 100644
--- a/llvm/test/CodeGen/X86/btc_bts_btr.ll
+++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll
@@ -1021,7 +1021,7 @@ define i64 @btr_64_mask_zeros(i64 %x, i64 %n) {
 ; X64-LABEL: btr_64_mask_zeros:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    shlq $2, %rsi
 ; X64-NEXT:    btrq %rsi, %rax
 ; X64-NEXT:    retq
 ;
@@ -1056,7 +1056,7 @@ define i64 @bts_64_mask_zeros(i64 %x, i64 %n) {
 ; X64-LABEL: bts_64_mask_zeros:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    shlq $2, %rsi
 ; X64-NEXT:    btsq %rsi, %rax
 ; X64-NEXT:    retq
 ;
@@ -1088,7 +1088,7 @@ define i64 @btc_64_mask_zeros(i64 %x, i64 %n) {
 ; X64-LABEL: btc_64_mask_zeros:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    shlq $2, %rsi
 ; X64-NEXT:    btcq %rsi, %rax
 ; X64-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/narrow-shl-cst.ll b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
index 296ef52c3bff9..107f14a0e2d2c 100644
--- a/llvm/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
@@ -151,7 +151,7 @@ define i32 @test12(i32 %x, ptr %y) nounwind {
 define i64 @test13(i64 %x, ptr %y) nounwind {
 ; CHECK-LABEL: test13:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addl %edi, %edi
+; CHECK-NEXT:    addq %rdi, %rdi
 ; CHECK-NEXT:    movzbl %dil, %eax
 ; CHECK-NEXT:    movq %rax, (%rsi)
 ; CHECK-NEXT:    retq
@@ -212,7 +212,7 @@ define i64 @test18(i64 %x) nounwind {
 ; CHECK-LABEL: test18:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzbl %dil, %eax
-; CHECK-NEXT:    shll $10, %eax
+; CHECK-NEXT:    shlq $10, %rax
 ; CHECK-NEXT:    retq
   %and = shl i64 %x, 10
   %shl = and i64 %and, 261120
@@ -234,7 +234,7 @@ define i64 @test20(i64 %x) nounwind {
 ; CHECK-LABEL: test20:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzwl %di, %eax
-; CHECK-NEXT:    shll $10, %eax
+; CHECK-NEXT:    shlq $10, %rax
 ; CHECK-NEXT:    retq
   %and = shl i64 %x, 10
   %shl = and i64 %and, 67107840
diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll
index 3bd3be62fb4c8..9da22f635c266 100644
--- a/llvm/test/CodeGen/X86/pr27202.ll
+++ b/llvm/test/CodeGen/X86/pr27202.ll
@@ -45,8 +45,8 @@ define zeroext i1 @g(i32 %x) optsize {
 define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
 ; CHECK-LABEL: PR46237:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    shll $6, %eax
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    shlq $6, %rax
 ; CHECK-NEXT:    movzbl %al, %ecx
 ; CHECK-NEXT:    andl $7, %esi
 ; CHECK-NEXT:    andl $7, %edx
diff --git a/llvm/test/CodeGen/X86/pr49162.ll b/llvm/test/CodeGen/X86/pr49162.ll
index 0e65e121531bf..db8cec61acd6b 100644
--- a/llvm/test/CodeGen/X86/pr49162.ll
+++ b/llvm/test/CodeGen/X86/pr49162.ll
@@ -17,10 +17,7 @@ define ptr @PR49162(ptr %base, ptr %ptr160) {
 ;
 ; X64-LABEL: PR49162:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl 8(%rsi), %eax
-; X64-NEXT:    shll $16, %eax
-; X64-NEXT:    cltq
-; X64-NEXT:    sarq $16, %rax
+; X64-NEXT:    movswq 8(%rsi), %rax
 ; X64-NEXT:    leaq (%rdi,%rax,4), %rax
 ; X64-NEXT:    retq
   %load160 = load i160, ptr %ptr160, align 4
diff --git a/llvm/test/CodeGen/X86/pr92720.ll b/llvm/test/CodeGen/X86/pr92720.ll
new file mode 100644
index 0000000000000..b2543c08328c7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr92720.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Make sure we don't crash when shrinking the shift amount before legalization.
+define i64 @pr92720(i64 %x) {
+; CHECK-LABEL: pr92720:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movabsq $8589934592, %rax # imm = 0x200000000
+; CHECK-NEXT:    retq
+  %or = or i64 %x, 255
+  %sub = sub i64 0, %or
+  %shl = shl i64 1, %sub
+  %sext = shl i64 %shl, 32
+  ret i64 %sext
+}
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 785b97d8c2402..53d3367cce4d3 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -13,7 +13,7 @@ define i256 @test1(i256 %a) nounwind {
 ; ILP-LABEL: test1:
 ; ILP:       # %bb.0:
 ; ILP-NEXT:    movq %rdi, %rax
-; ILP-NEXT:    leal (%rsi,%rsi), %ecx
+; ILP-NEXT:    leaq (%rsi,%rsi), %rcx
 ; ILP-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; ILP-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; ILP-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
@@ -43,7 +43,7 @@ define i256 @test1(i256 %a) nounwind {
 ; ILP-NEXT:    shlq %cl, %rsi
 ; ILP-NEXT:    notb %cl
 ; ILP-NEXT:    shrq %rdx
-; ILP-NEXT:    # kill: def $cl killed $cl killed $ecx
+; ILP-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; ILP-NEXT:    shrq %cl, %rdx
 ; ILP-NEXT:    orq %rsi, %rdx
 ; ILP-NEXT:    movq %rdx, 16(%rax)
@@ -60,7 +60,7 @@ define i256 @test1(i256 %a) nounwind {
 ; HYBRID-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; HYBRID-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; HYBRID-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
-; HYBRID-NEXT:    addl %esi, %esi
+; HYBRID-NEXT:    addq %rsi, %rsi
 ; HYBRID-NEXT:    addb $3, %sil
 ; HYBRID-NEXT:    movl %esi, %ecx
 ; HYBRID-NEXT:    andb $7, %cl
@@ -97,7 +97,7 @@ define i256 @test1(i256 %a) nounwind {
 ; BURR-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; BURR-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; BURR-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
-; BURR-NEXT:    addl %esi, %esi
+; BURR-NEXT:    addq %rsi, %rsi
 ; BURR-NEXT:    addb $3, %sil
 ; BURR-NEXT:    movl %esi, %ecx
 ; BURR-NEXT:    andb $7, %cl
@@ -126,7 +126,7 @@ define i256 @test1(i256 %a) nounwind {
 ; SRC-LABEL: test1:
 ; SRC:       # %bb.0:
 ; SRC-NEXT:    movq %rdi, %rax
-; SRC-NEXT:    addl %esi, %esi
+; SRC-NEXT:    addq %rsi, %rsi
 ; SRC-NEXT:    addb $3, %sil
 ; SRC-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; SRC-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
@@ -167,7 +167,7 @@ define i256 @test1(i256 %a) nounwind {
 ; LIN-LABEL: test1:
 ; LIN:       # %bb.0:
 ; LIN-NEXT:    movq %rdi, %rax
-; LIN-NEXT:    leal (%rsi,%rsi), %edx
+; LIN-NEXT:    leaq (%rsi,%rsi), %rdx
 ; LIN-NEXT:    addb $3, %dl
 ; LIN-NEXT:    movl %edx, %ecx
 ; LIN-NEXT:    shrb $3, %cl
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 85c1e25c29ed5..739d5b1d32e86 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -3615,13 +3615,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movl 8(%rdi), %ecx
-; SSE2-NEXT:    shll $28, %ecx
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    shrq $51, %rdx
-; SSE2-NEXT:    shll $15, %edx
-; SSE2-NEXT:    orl %ecx, %edx
-; SSE2-NEXT:    sarl $15, %edx
-; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    shldq $13, %rax, %rcx
+; SSE2-NEXT:    shll $15, %ecx
+; SSE2-NEXT:    sarl $15, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    shrq $34, %rax
 ; SSE2-NEXT:    shll $15, %eax
 ; SSE2-NEXT:    sarl $15, %eax
@@ -3644,13 +3641,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSSE3-NEXT:    movl 8(%rdi), %ecx
-; SSSE3-NEXT:    shll $28, %ecx
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    shrq $51, %rdx
-; SSSE3-NEXT:    shll $15, %edx
-; SSSE3-NEXT:    orl %ecx, %edx
-; SSSE3-NEXT:    sarl $15, %edx
-; SSSE3-NEXT:    movd %edx, %xmm1
+; SSSE3-NEXT:    shldq $13, %rax, %rcx
+; SSSE3-NEXT:    shll $15, %ecx
+; SSSE3-NEXT:    sarl $15, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    shrq $34, %rax
 ; SSSE3-NEXT:    shll $15, %eax
 ; SSSE3-NEXT:    sarl $15, %eax
@@ -3662,53 +3656,47 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
 ; SSE41-LABEL: sext_4i17_to_4i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shrq $17, %rcx
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    movq %rax, %rdx
+; SSE41-NEXT:    movl 8(%rdi), %esi
+; SSE41-NEXT:    shldq $13, %rax, %rsi
+; SSE41-NEXT:    shrq $17, %rax
+; SSE41-NEXT:    shll $15, %eax
+; SSE41-NEXT:    sarl $15, %eax
 ; SSE41-NEXT:    shll $15, %ecx
 ; SSE41-NEXT:    sarl $15, %ecx
-; SSE41-NEXT:    movl %eax, %edx
+; SSE41-NEXT:    movd %ecx, %xmm0
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; SSE41-NEXT:    shrq $34, %rdx
 ; SSE41-NEXT:    shll $15, %edx
 ; SSE41-NEXT:    sarl $15, %edx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shrq $34, %rcx
-; SSE41-NEXT:    shll $15, %ecx
-; SSE41-NEXT:    sarl $15, %ecx
-; SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
-; SSE41-NEXT:    movl 8(%rdi), %ecx
-; SSE41-NEXT:    shll $28, %ecx
-; SSE41-NEXT:    shrq $51, %rax
-; SSE41-NEXT:    shll $15, %eax
-; SSE41-NEXT:    orl %ecx, %eax
-; SSE41-NEXT:    sarl $15, %eax
-; SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; SSE41-NEXT:    pinsrd $2, %edx, %xmm0
+; SSE41-NEXT:    shll $15, %esi
+; SSE41-NEXT:    sarl $15, %esi
+; SSE41-NEXT:    pinsrd $3, %esi, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: sext_4i17_to_4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq (%rdi), %rax
-; AVX-NEXT:    movq %rax, %rcx
-; AVX-NEXT:    shrq $17, %rcx
+; AVX-NEXT:    movl %eax, %ecx
+; AVX-NEXT:    movq %rax, %rdx
+; AVX-NEXT:    movl 8(%rdi), %esi
+; AVX-NEXT:    shldq $13, %rax, %rsi
+; AVX-NEXT:    shrq $17, %rax
+; AVX-NEXT:    shll $15, %eax
+; AVX-NEXT:    sarl $15, %eax
 ; AVX-NEXT:    shll $15, %ecx
 ; AVX-NEXT:    sarl $15, %ecx
-; AVX-NEXT:    movl %eax, %edx
+; AVX-NEXT:    vmovd %ecx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    shrq $34, %rdx
 ; AVX-NEXT:    shll $15, %edx
 ; AVX-NEXT:    sarl $15, %edx
-; AVX-NEXT:    vmovd %edx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX-NEXT:    movq %rax, %rcx
-; AVX-NEXT:    shrq $34, %rcx
-; AVX-NEXT:    shll $15, %ecx
-; AVX-NEXT:    sarl $15, %ecx
-; AVX-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX-NEXT:    movl 8(%rdi), %ecx
-; AVX-NEXT:    shll $28, %ecx
-; AVX-NEXT:    shrq $51, %rax
-; AVX-NEXT:    shll $15, %eax
-; AVX-NEXT:    orl %ecx, %eax
-; AVX-NEXT:    sarl $15, %eax
-; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
+; AVX-NEXT:    shll $15, %esi
+; AVX-NEXT:    sarl $15, %esi
+; AVX-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; X86-SSE2-LABEL: sext_4i17_to_4i32:
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 74926f46ffa43..25c438cc4c4fc 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2332,11 +2332,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movl 8(%rdi), %ecx
-; SSE2-NEXT:    shll $13, %ecx
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    shrq $51, %rdx
-; SSE2-NEXT:    orl %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    shldq $13, %rax, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    shrq $34, %rax
 ; SSE2-NEXT:    movd %eax, %xmm2
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2353,11 +2350,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSSE3-NEXT:    movl 8(%rdi), %ecx
-; SSSE3-NEXT:    shll $13, %ecx
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    shrq $51, %rdx
-; SSSE3-NEXT:    orl %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm1
+; SSSE3-NEXT:    shldq $13, %rax, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    shrq $34, %rax
 ; SSSE3-NEXT:    movd %eax, %xmm2
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2367,15 +2361,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; SSE41-LABEL: zext_4i17_to_4i32:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movl 8(%rdi), %eax
-; SSE41-NEXT:    shll $13, %eax
-; SSE41-NEXT:    movq (%rdi), %rcx
-; SSE41-NEXT:    movq %rcx, %rdx
-; SSE41-NEXT:    shrq $51, %rdx
-; SSE41-NEXT:    orl %eax, %edx
-; SSE41-NEXT:    movq %rcx, %rax
+; SSE41-NEXT:    movq (%rdi), %rax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    movl 8(%rdi), %edx
+; SSE41-NEXT:    shldq $13, %rax, %rdx
 ; SSE41-NEXT:    shrq $17, %rax
-; SSE41-NEXT:    movd %ecx, %xmm0
 ; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
 ; SSE41-NEXT:    shrq $34, %rcx
 ; SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
@@ -2385,15 +2376,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; AVX1-LABEL: zext_4i17_to_4i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    movl 8(%rdi), %eax
-; AVX1-NEXT:    shll $13, %eax
-; AVX1-NEXT:    movq (%rdi), %rcx
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $51, %rdx
-; AVX1-NEXT:    orl %eax, %edx
-; AVX1-NEXT:    movq %rcx, %rax
+; AVX1-NEXT:    movq (%rdi), %rax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    movl 8(%rdi), %edx
+; AVX1-NEXT:    shldq $13, %rax, %rdx
 ; AVX1-NEXT:    shrq $17, %rax
-; AVX1-NEXT:    vmovd %ecx, %xmm0
 ; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX1-NEXT:    shrq $34, %rcx
 ; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2403,15 +2391,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; AVX2-LABEL: zext_4i17_to_4i32:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    movl 8(%rdi), %eax
-; AVX2-NEXT:    shll $13, %eax
-; AVX2-NEXT:    movq (%rdi), %rcx
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $51, %rdx
-; AVX2-NEXT:    orl %eax, %edx
-; AVX2-NEXT:    movq %rcx, %rax
+; AVX2-NEXT:    movq (%rdi), %rax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    movl 8(%rdi), %edx
+; AVX2-NEXT:    shldq $13, %rax, %rdx
 ; AVX2-NEXT:    shrq $17, %rax
-; AVX2-NEXT:    vmovd %ecx, %xmm0
 ; AVX2-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX2-NEXT:    shrq $34, %rcx
 ; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2422,15 +2407,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; AVX512-LABEL: zext_4i17_to_4i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movl 8(%rdi), %eax
-; AVX512-NEXT:    shll $13, %eax
-; AVX512-NEXT:    movq (%rdi), %rcx
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $51, %rdx
-; AVX512-NEXT:    orl %eax, %edx
-; AVX512-NEXT:    movq %rcx, %rax
+; AVX512-NEXT:    movq (%rdi), %rax
+; AVX512-NEXT:    vmovd %eax, %xmm0
+; AVX512-NEXT:    movq %rax, %rcx
+; AVX512-NEXT:    movl 8(%rdi), %edx
+; AVX512-NEXT:    shldq $13, %rax, %rdx
 ; AVX512-NEXT:    shrq $17, %rax
-; AVX512-NEXT:    vmovd %ecx, %xmm0
 ; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX512-NEXT:    shrq $34, %rcx
 ; AVX512-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index 9ae1f270e8833..2611399458c27 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -171,16 +171,16 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
 define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-NO-BMI2:       # %bb.0:
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
 ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movb %al, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    movl (%rdi), %eax
 ; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    movb %al, (%rdx)
@@ -248,16 +248,16 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
 define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-NO-BMI2:       # %bb.0:
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
 ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movw %ax, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    movl (%rdi), %eax
 ; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    movw %ax, (%rdx)
@@ -324,16 +324,16 @@ define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
 define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
 ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-NO-BMI2:       # %bb.0:
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
 ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movl %eax, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    movl (%rdi), %eax
 ; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    movl %eax, (%rdx)
@@ -402,7 +402,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %cl
@@ -414,7 +414,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-SHLD:       # %bb.0:
 ; X64-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-SHLD-NEXT:    movq (%rdi), %rax
-; X64-SHLD-NEXT:    shll $3, %ecx
+; X64-SHLD-NEXT:    shlq $3, %rcx
 ; X64-SHLD-NEXT:    xorl %esi, %esi
 ; X64-SHLD-NEXT:    shrdq %cl, %rsi, %rax
 ; X64-SHLD-NEXT:    testb $64, %cl
@@ -424,7 +424,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ;
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    testb $64, %sil
@@ -475,7 +475,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %cl
@@ -487,7 +487,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-SHLD:       # %bb.0:
 ; X64-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-SHLD-NEXT:    movq (%rdi), %rax
-; X64-SHLD-NEXT:    shll $3, %ecx
+; X64-SHLD-NEXT:    shlq $3, %rcx
 ; X64-SHLD-NEXT:    xorl %esi, %esi
 ; X64-SHLD-NEXT:    shrdq %cl, %rsi, %rax
 ; X64-SHLD-NEXT:    testb $64, %cl
@@ -497,7 +497,7 @@ define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ;
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    testb $64, %sil
@@ -547,7 +547,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %cl
@@ -559,7 +559,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-SHLD:       # %bb.0:
 ; X64-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-SHLD-NEXT:    movq (%rdi), %rax
-; X64-SHLD-NEXT:    shll $3, %ecx
+; X64-SHLD-NEXT:    shlq $3, %rcx
 ; X64-SHLD-NEXT:    xorl %esi, %esi
 ; X64-SHLD-NEXT:    shrdq %cl, %rsi, %rax
 ; X64-SHLD-NEXT:    testb $64, %cl
@@ -569,7 +569,7 @@ define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ;
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    testb $64, %sil
@@ -619,7 +619,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %cl
@@ -631,7 +631,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-SHLD:       # %bb.0:
 ; X64-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-SHLD-NEXT:    movq (%rdi), %rax
-; X64-SHLD-NEXT:    shll $3, %ecx
+; X64-SHLD-NEXT:    shlq $3, %rcx
 ; X64-SHLD-NEXT:    xorl %esi, %esi
 ; X64-SHLD-NEXT:    shrdq %cl, %rsi, %rax
 ; X64-SHLD-NEXT:    testb $64, %cl
@@ -641,7 +641,7 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ;
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorl %ecx, %ecx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    testb $64, %sil
@@ -692,7 +692,7 @@ define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -759,7 +759,7 @@ define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -825,7 +825,7 @@ define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -891,7 +891,7 @@ define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -959,7 +959,7 @@ define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i
 ; X64-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NEXT:    movq %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
index 929671d674e5e..38a3a0ca7ab05 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
@@ -169,15 +169,15 @@ define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca:
 ; X64-NO-BMI2:       # %bb.0:
 ; X64-NO-BMI2-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movb %al, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-BMI2-NEXT:    movb %al, (%rdx)
 ; X64-BMI2-NEXT:    retq
@@ -293,15 +293,15 @@ define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca:
 ; X64-NO-BMI2:       # %bb.0:
 ; X64-NO-BMI2-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movw %ax, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-BMI2-NEXT:    movw %ax, (%rdx)
 ; X64-BMI2-NEXT:    retq
@@ -414,15 +414,15 @@ define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca:
 ; X64-NO-BMI2:       # %bb.0:
 ; X64-NO-BMI2-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movl %eax, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
 ; X64-BMI2-NEXT:    movl %eax, (%rdx)
 ; X64-BMI2-NEXT:    retq
@@ -537,7 +537,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rdi
@@ -558,7 +558,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -572,7 +572,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rcx
@@ -592,7 +592,7 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -645,7 +645,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rdi
@@ -666,7 +666,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -680,7 +680,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rcx
@@ -700,7 +700,7 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -752,7 +752,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rdi
@@ -773,7 +773,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -787,7 +787,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rcx
@@ -807,7 +807,7 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -859,7 +859,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rdi
@@ -880,7 +880,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-NO-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-NO-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-NO-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -894,7 +894,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca:
 ; X64-HAVE-BMI2-NO-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shll $3, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlq $3, %rsi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm1, %rax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %xmm0, %rcx
@@ -914,7 +914,7 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0:
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movdqu (%rdi), %xmm0
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shlq $3, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rax
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %xmm0, %rsi
@@ -971,7 +971,7 @@ define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movdqu 16(%rdi), %xmm1
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
 ; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1042,7 +1042,7 @@ define void @load_2byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movdqu 16(%rdi), %xmm1
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
 ; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1112,7 +1112,7 @@ define void @load_4byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movdqu 16(%rdi), %xmm1
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
 ; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1182,7 +1182,7 @@ define void @load_8byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movdqu 16(%rdi), %xmm1
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
 ; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
@@ -1254,7 +1254,7 @@ define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqu (%rdi), %xmm0
 ; X64-NEXT:    movdqu 16(%rdi), %xmm1
-; X64-NEXT:    shll $3, %esi
+; X64-NEXT:    shlq $3, %rsi
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
 ; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
 ; X64-NEXT:    movq %xmm1, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
index 3f64a383abd2c..9d9e13dd8dfe5 100644
--- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
+++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll
@@ -15,7 +15,7 @@ define i64 @test1(ptr %data) {
 ; X64-LABEL: test1:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl (%rdi), %eax
-; X64-NEXT:    shll $2, %eax
+; X64-NEXT:    shlq $2, %rax
 ; X64-NEXT:    andl $60, %eax
 ; X64-NEXT:    retq
 entry:

>From 93a276ff49c6fdfd4745c99e828bbcaed9ba84a4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 20 May 2024 20:02:48 +0800
Subject: [PATCH 2/2] [SDAG] Fix bitwidth checks.

---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 32a8dfdd3f75d..acb3debf80d8b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -596,11 +596,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
   // Op's type. For expedience, just check power-of-2 integer types.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned DemandedSize = DemandedBits.getActiveBits();
+  // Types of LHS and RHS may differ before legalization (e.g., shl), so we
+  // need to check both.
+  unsigned MinWidth =
+      std::min(Op.getOperand(0).getValueType().getScalarSizeInBits(),
+               Op.getOperand(1).getValueType().getScalarSizeInBits());
   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
-       SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+       SmallVTBits < MinWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
-    // Types of LHS and RHS may differ before legalization (e.g., shl), so we
-    // need to check both.
     if (TLI.isTruncateFree(Op.getOperand(0).getValueType(), SmallVT) &&
         TLI.isTruncateFree(Op.getOperand(1).getValueType(), SmallVT) &&
         TLI.isZExtFree(SmallVT, VT)) {



More information about the llvm-commits mailing list