[llvm] [SDAG] Fix type checks in `ShrinkDemandedOp` to avoid creating invalid truncates (PR #92730)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 20 03:06:38 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Yingwei Zheng (dtcxzyw)

<details>
<summary>Changes</summary>

In `TargetLowering::ShrinkDemandedOp`, types of lhs and rhs may differ before legalization so we should check the both.

In the original case, `VT` is `i64` and `SmallVT` is `i32`, but the type of rhs is `i8`.

See the description about ISD::SHL for further information:
> After legalization, the type of the shift amount is known to be TLI.getShiftAmountTy().  Before legalization the shift amount can be any type, but care must be taken to ensure it is large enough.

https://github.com/llvm/llvm-project/blob/605ae4e93be8976095c7eedf5c08bfdb9ff71257/llvm/include/llvm/CodeGen/ISDOpcodes.h#L691-L712

Fixes https://github.com/llvm/llvm-project/issues/92720.

---

Patch is 39.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92730.diff


12 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+5-1) 
- (modified) llvm/test/CodeGen/X86/btc_bts_btr.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/narrow-shl-cst.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/pr27202.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/pr49162.ll (+1-4) 
- (added) llvm/test/CodeGen/X86/pr92720.ll (+15) 
- (modified) llvm/test/CodeGen/X86/scheduler-backtracking.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+36-48) 
- (modified) llvm/test/CodeGen/X86/vector-zext.ll (+24-42) 
- (modified) llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll (+26-26) 
- (modified) llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll (+30-30) 
- (modified) llvm/test/CodeGen/X86/zext-logicop-shift-load.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3ec6b9b795079..32a8dfdd3f75d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -599,7 +599,11 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
   for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
        SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
-    if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
+    // Types of LHS and RHS may differ before legalization (e.g., shl), so we
+    // need to check both.
+    if (TLI.isTruncateFree(Op.getOperand(0).getValueType(), SmallVT) &&
+        TLI.isTruncateFree(Op.getOperand(1).getValueType(), SmallVT) &&
+        TLI.isZExtFree(SmallVT, VT)) {
       // We found a type with free casts.
       SDValue X = DAG.getNode(
           Op.getOpcode(), dl, SmallVT,
diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll
index efd9d1105d975..e110e5c9274db 100644
--- a/llvm/test/CodeGen/X86/btc_bts_btr.ll
+++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll
@@ -1021,7 +1021,7 @@ define i64 @btr_64_mask_zeros(i64 %x, i64 %n) {
 ; X64-LABEL: btr_64_mask_zeros:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    shlq $2, %rsi
 ; X64-NEXT:    btrq %rsi, %rax
 ; X64-NEXT:    retq
 ;
@@ -1056,7 +1056,7 @@ define i64 @bts_64_mask_zeros(i64 %x, i64 %n) {
 ; X64-LABEL: bts_64_mask_zeros:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    shlq $2, %rsi
 ; X64-NEXT:    btsq %rsi, %rax
 ; X64-NEXT:    retq
 ;
@@ -1088,7 +1088,7 @@ define i64 @btc_64_mask_zeros(i64 %x, i64 %n) {
 ; X64-LABEL: btc_64_mask_zeros:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    shlq $2, %rsi
 ; X64-NEXT:    btcq %rsi, %rax
 ; X64-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/narrow-shl-cst.ll b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
index 296ef52c3bff9..107f14a0e2d2c 100644
--- a/llvm/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
@@ -151,7 +151,7 @@ define i32 @test12(i32 %x, ptr %y) nounwind {
 define i64 @test13(i64 %x, ptr %y) nounwind {
 ; CHECK-LABEL: test13:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addl %edi, %edi
+; CHECK-NEXT:    addq %rdi, %rdi
 ; CHECK-NEXT:    movzbl %dil, %eax
 ; CHECK-NEXT:    movq %rax, (%rsi)
 ; CHECK-NEXT:    retq
@@ -212,7 +212,7 @@ define i64 @test18(i64 %x) nounwind {
 ; CHECK-LABEL: test18:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzbl %dil, %eax
-; CHECK-NEXT:    shll $10, %eax
+; CHECK-NEXT:    shlq $10, %rax
 ; CHECK-NEXT:    retq
   %and = shl i64 %x, 10
   %shl = and i64 %and, 261120
@@ -234,7 +234,7 @@ define i64 @test20(i64 %x) nounwind {
 ; CHECK-LABEL: test20:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movzwl %di, %eax
-; CHECK-NEXT:    shll $10, %eax
+; CHECK-NEXT:    shlq $10, %rax
 ; CHECK-NEXT:    retq
   %and = shl i64 %x, 10
   %shl = and i64 %and, 67107840
diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll
index 3bd3be62fb4c8..9da22f635c266 100644
--- a/llvm/test/CodeGen/X86/pr27202.ll
+++ b/llvm/test/CodeGen/X86/pr27202.ll
@@ -45,8 +45,8 @@ define zeroext i1 @g(i32 %x) optsize {
 define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
 ; CHECK-LABEL: PR46237:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    shll $6, %eax
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    shlq $6, %rax
 ; CHECK-NEXT:    movzbl %al, %ecx
 ; CHECK-NEXT:    andl $7, %esi
 ; CHECK-NEXT:    andl $7, %edx
diff --git a/llvm/test/CodeGen/X86/pr49162.ll b/llvm/test/CodeGen/X86/pr49162.ll
index 0e65e121531bf..db8cec61acd6b 100644
--- a/llvm/test/CodeGen/X86/pr49162.ll
+++ b/llvm/test/CodeGen/X86/pr49162.ll
@@ -17,10 +17,7 @@ define ptr @PR49162(ptr %base, ptr %ptr160) {
 ;
 ; X64-LABEL: PR49162:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl 8(%rsi), %eax
-; X64-NEXT:    shll $16, %eax
-; X64-NEXT:    cltq
-; X64-NEXT:    sarq $16, %rax
+; X64-NEXT:    movswq 8(%rsi), %rax
 ; X64-NEXT:    leaq (%rdi,%rax,4), %rax
 ; X64-NEXT:    retq
   %load160 = load i160, ptr %ptr160, align 4
diff --git a/llvm/test/CodeGen/X86/pr92720.ll b/llvm/test/CodeGen/X86/pr92720.ll
new file mode 100644
index 0000000000000..b2543c08328c7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr92720.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Make sure we don't crash when shrinking the shift amount before legalization.
+define i64 @pr92720(i64 %x) {
+; CHECK-LABEL: pr92720:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movabsq $8589934592, %rax # imm = 0x200000000
+; CHECK-NEXT:    retq
+  %or = or i64 %x, 255
+  %sub = sub i64 0, %or
+  %shl = shl i64 1, %sub
+  %sext = shl i64 %shl, 32
+  ret i64 %sext
+}
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 785b97d8c2402..53d3367cce4d3 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -13,7 +13,7 @@ define i256 @test1(i256 %a) nounwind {
 ; ILP-LABEL: test1:
 ; ILP:       # %bb.0:
 ; ILP-NEXT:    movq %rdi, %rax
-; ILP-NEXT:    leal (%rsi,%rsi), %ecx
+; ILP-NEXT:    leaq (%rsi,%rsi), %rcx
 ; ILP-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; ILP-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; ILP-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
@@ -43,7 +43,7 @@ define i256 @test1(i256 %a) nounwind {
 ; ILP-NEXT:    shlq %cl, %rsi
 ; ILP-NEXT:    notb %cl
 ; ILP-NEXT:    shrq %rdx
-; ILP-NEXT:    # kill: def $cl killed $cl killed $ecx
+; ILP-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; ILP-NEXT:    shrq %cl, %rdx
 ; ILP-NEXT:    orq %rsi, %rdx
 ; ILP-NEXT:    movq %rdx, 16(%rax)
@@ -60,7 +60,7 @@ define i256 @test1(i256 %a) nounwind {
 ; HYBRID-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; HYBRID-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; HYBRID-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
-; HYBRID-NEXT:    addl %esi, %esi
+; HYBRID-NEXT:    addq %rsi, %rsi
 ; HYBRID-NEXT:    addb $3, %sil
 ; HYBRID-NEXT:    movl %esi, %ecx
 ; HYBRID-NEXT:    andb $7, %cl
@@ -97,7 +97,7 @@ define i256 @test1(i256 %a) nounwind {
 ; BURR-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; BURR-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; BURR-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
-; BURR-NEXT:    addl %esi, %esi
+; BURR-NEXT:    addq %rsi, %rsi
 ; BURR-NEXT:    addb $3, %sil
 ; BURR-NEXT:    movl %esi, %ecx
 ; BURR-NEXT:    andb $7, %cl
@@ -126,7 +126,7 @@ define i256 @test1(i256 %a) nounwind {
 ; SRC-LABEL: test1:
 ; SRC:       # %bb.0:
 ; SRC-NEXT:    movq %rdi, %rax
-; SRC-NEXT:    addl %esi, %esi
+; SRC-NEXT:    addq %rsi, %rsi
 ; SRC-NEXT:    addb $3, %sil
 ; SRC-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
 ; SRC-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
@@ -167,7 +167,7 @@ define i256 @test1(i256 %a) nounwind {
 ; LIN-LABEL: test1:
 ; LIN:       # %bb.0:
 ; LIN-NEXT:    movq %rdi, %rax
-; LIN-NEXT:    leal (%rsi,%rsi), %edx
+; LIN-NEXT:    leaq (%rsi,%rsi), %rdx
 ; LIN-NEXT:    addb $3, %dl
 ; LIN-NEXT:    movl %edx, %ecx
 ; LIN-NEXT:    shrb $3, %cl
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 85c1e25c29ed5..739d5b1d32e86 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -3615,13 +3615,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movl 8(%rdi), %ecx
-; SSE2-NEXT:    shll $28, %ecx
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    shrq $51, %rdx
-; SSE2-NEXT:    shll $15, %edx
-; SSE2-NEXT:    orl %ecx, %edx
-; SSE2-NEXT:    sarl $15, %edx
-; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    shldq $13, %rax, %rcx
+; SSE2-NEXT:    shll $15, %ecx
+; SSE2-NEXT:    sarl $15, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    shrq $34, %rax
 ; SSE2-NEXT:    shll $15, %eax
 ; SSE2-NEXT:    sarl $15, %eax
@@ -3644,13 +3641,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSSE3-NEXT:    movl 8(%rdi), %ecx
-; SSSE3-NEXT:    shll $28, %ecx
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    shrq $51, %rdx
-; SSSE3-NEXT:    shll $15, %edx
-; SSSE3-NEXT:    orl %ecx, %edx
-; SSSE3-NEXT:    sarl $15, %edx
-; SSSE3-NEXT:    movd %edx, %xmm1
+; SSSE3-NEXT:    shldq $13, %rax, %rcx
+; SSSE3-NEXT:    shll $15, %ecx
+; SSSE3-NEXT:    sarl $15, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    shrq $34, %rax
 ; SSSE3-NEXT:    shll $15, %eax
 ; SSSE3-NEXT:    sarl $15, %eax
@@ -3662,53 +3656,47 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
 ; SSE41-LABEL: sext_4i17_to_4i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    movq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shrq $17, %rcx
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    movq %rax, %rdx
+; SSE41-NEXT:    movl 8(%rdi), %esi
+; SSE41-NEXT:    shldq $13, %rax, %rsi
+; SSE41-NEXT:    shrq $17, %rax
+; SSE41-NEXT:    shll $15, %eax
+; SSE41-NEXT:    sarl $15, %eax
 ; SSE41-NEXT:    shll $15, %ecx
 ; SSE41-NEXT:    sarl $15, %ecx
-; SSE41-NEXT:    movl %eax, %edx
+; SSE41-NEXT:    movd %ecx, %xmm0
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; SSE41-NEXT:    shrq $34, %rdx
 ; SSE41-NEXT:    shll $15, %edx
 ; SSE41-NEXT:    sarl $15, %edx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shrq $34, %rcx
-; SSE41-NEXT:    shll $15, %ecx
-; SSE41-NEXT:    sarl $15, %ecx
-; SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
-; SSE41-NEXT:    movl 8(%rdi), %ecx
-; SSE41-NEXT:    shll $28, %ecx
-; SSE41-NEXT:    shrq $51, %rax
-; SSE41-NEXT:    shll $15, %eax
-; SSE41-NEXT:    orl %ecx, %eax
-; SSE41-NEXT:    sarl $15, %eax
-; SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; SSE41-NEXT:    pinsrd $2, %edx, %xmm0
+; SSE41-NEXT:    shll $15, %esi
+; SSE41-NEXT:    sarl $15, %esi
+; SSE41-NEXT:    pinsrd $3, %esi, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: sext_4i17_to_4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq (%rdi), %rax
-; AVX-NEXT:    movq %rax, %rcx
-; AVX-NEXT:    shrq $17, %rcx
+; AVX-NEXT:    movl %eax, %ecx
+; AVX-NEXT:    movq %rax, %rdx
+; AVX-NEXT:    movl 8(%rdi), %esi
+; AVX-NEXT:    shldq $13, %rax, %rsi
+; AVX-NEXT:    shrq $17, %rax
+; AVX-NEXT:    shll $15, %eax
+; AVX-NEXT:    sarl $15, %eax
 ; AVX-NEXT:    shll $15, %ecx
 ; AVX-NEXT:    sarl $15, %ecx
-; AVX-NEXT:    movl %eax, %edx
+; AVX-NEXT:    vmovd %ecx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    shrq $34, %rdx
 ; AVX-NEXT:    shll $15, %edx
 ; AVX-NEXT:    sarl $15, %edx
-; AVX-NEXT:    vmovd %edx, %xmm0
-; AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX-NEXT:    movq %rax, %rcx
-; AVX-NEXT:    shrq $34, %rcx
-; AVX-NEXT:    shll $15, %ecx
-; AVX-NEXT:    sarl $15, %ecx
-; AVX-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX-NEXT:    movl 8(%rdi), %ecx
-; AVX-NEXT:    shll $28, %ecx
-; AVX-NEXT:    shrq $51, %rax
-; AVX-NEXT:    shll $15, %eax
-; AVX-NEXT:    orl %ecx, %eax
-; AVX-NEXT:    sarl $15, %eax
-; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
+; AVX-NEXT:    shll $15, %esi
+; AVX-NEXT:    sarl $15, %esi
+; AVX-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; X86-SSE2-LABEL: sext_4i17_to_4i32:
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 74926f46ffa43..25c438cc4c4fc 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2332,11 +2332,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    movl 8(%rdi), %ecx
-; SSE2-NEXT:    shll $13, %ecx
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    shrq $51, %rdx
-; SSE2-NEXT:    orl %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    shldq $13, %rax, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    shrq $34, %rax
 ; SSE2-NEXT:    movd %eax, %xmm2
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2353,11 +2350,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSSE3-NEXT:    movl 8(%rdi), %ecx
-; SSSE3-NEXT:    shll $13, %ecx
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    shrq $51, %rdx
-; SSSE3-NEXT:    orl %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm1
+; SSSE3-NEXT:    shldq $13, %rax, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    shrq $34, %rax
 ; SSSE3-NEXT:    movd %eax, %xmm2
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2367,15 +2361,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; SSE41-LABEL: zext_4i17_to_4i32:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    movl 8(%rdi), %eax
-; SSE41-NEXT:    shll $13, %eax
-; SSE41-NEXT:    movq (%rdi), %rcx
-; SSE41-NEXT:    movq %rcx, %rdx
-; SSE41-NEXT:    shrq $51, %rdx
-; SSE41-NEXT:    orl %eax, %edx
-; SSE41-NEXT:    movq %rcx, %rax
+; SSE41-NEXT:    movq (%rdi), %rax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    movl 8(%rdi), %edx
+; SSE41-NEXT:    shldq $13, %rax, %rdx
 ; SSE41-NEXT:    shrq $17, %rax
-; SSE41-NEXT:    movd %ecx, %xmm0
 ; SSE41-NEXT:    pinsrd $1, %eax, %xmm0
 ; SSE41-NEXT:    shrq $34, %rcx
 ; SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
@@ -2385,15 +2376,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; AVX1-LABEL: zext_4i17_to_4i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    movl 8(%rdi), %eax
-; AVX1-NEXT:    shll $13, %eax
-; AVX1-NEXT:    movq (%rdi), %rcx
-; AVX1-NEXT:    movq %rcx, %rdx
-; AVX1-NEXT:    shrq $51, %rdx
-; AVX1-NEXT:    orl %eax, %edx
-; AVX1-NEXT:    movq %rcx, %rax
+; AVX1-NEXT:    movq (%rdi), %rax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    movl 8(%rdi), %edx
+; AVX1-NEXT:    shldq $13, %rax, %rdx
 ; AVX1-NEXT:    shrq $17, %rax
-; AVX1-NEXT:    vmovd %ecx, %xmm0
 ; AVX1-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX1-NEXT:    shrq $34, %rcx
 ; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2403,15 +2391,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; AVX2-LABEL: zext_4i17_to_4i32:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    movl 8(%rdi), %eax
-; AVX2-NEXT:    shll $13, %eax
-; AVX2-NEXT:    movq (%rdi), %rcx
-; AVX2-NEXT:    movq %rcx, %rdx
-; AVX2-NEXT:    shrq $51, %rdx
-; AVX2-NEXT:    orl %eax, %edx
-; AVX2-NEXT:    movq %rcx, %rax
+; AVX2-NEXT:    movq (%rdi), %rax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    movl 8(%rdi), %edx
+; AVX2-NEXT:    shldq $13, %rax, %rdx
 ; AVX2-NEXT:    shrq $17, %rax
-; AVX2-NEXT:    vmovd %ecx, %xmm0
 ; AVX2-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX2-NEXT:    shrq $34, %rcx
 ; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2422,15 +2407,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
 ;
 ; AVX512-LABEL: zext_4i17_to_4i32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movl 8(%rdi), %eax
-; AVX512-NEXT:    shll $13, %eax
-; AVX512-NEXT:    movq (%rdi), %rcx
-; AVX512-NEXT:    movq %rcx, %rdx
-; AVX512-NEXT:    shrq $51, %rdx
-; AVX512-NEXT:    orl %eax, %edx
-; AVX512-NEXT:    movq %rcx, %rax
+; AVX512-NEXT:    movq (%rdi), %rax
+; AVX512-NEXT:    vmovd %eax, %xmm0
+; AVX512-NEXT:    movq %rax, %rcx
+; AVX512-NEXT:    movl 8(%rdi), %edx
+; AVX512-NEXT:    shldq $13, %rax, %rdx
 ; AVX512-NEXT:    shrq $17, %rax
-; AVX512-NEXT:    vmovd %ecx, %xmm0
 ; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX512-NEXT:    shrq $34, %rcx
 ; AVX512-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index 9ae1f270e8833..2611399458c27 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -171,16 +171,16 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
 define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-NO-BMI2:       # %bb.0:
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
 ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movb %al, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    movl (%rdi), %eax
 ; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    movb %al, (%rdx)
@@ -248,16 +248,16 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
 define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-NO-BMI2:       # %bb.0:
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
 ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movw %ax, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    movl (%rdi), %eax
 ; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    movw %ax, (%rdx)
@@ -324,16 +324,16 @@ define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
 define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
 ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-NO-BMI2:       # %bb.0:
-; X64-NO-BMI2-NEXT:    leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT:    leaq (,%rsi,8), %rcx
 ; X64-NO-BMI2-NEXT:    movl (%rdi), %eax
-; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NO-BMI2-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NEXT:    movl %eax, (%rdx)
 ; X64-NO-BMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    shll $3, %esi
+; X64-BMI2-NEXT:    shlq $3, %rsi
 ; X64-BMI2-NEXT:    movl (%rdi), %eax
 ; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    movl %eax, (%rdx)
@@ -402,7 +402,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-NO-BMI2-NO-SHLD:       # %bb.0:
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT:    shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT:    shlq $3, %rcx
 ; X64-NO-BMI2-NO-SHLD-NEXT:    shrq %cl, %rax
 ; X64-NO-BMI2-NO-SHLD-NEXT:    xorl %esi, %esi
 ; X64-NO-BMI2-NO-SHLD-NEXT:    testb $64, %cl
@@ -414,7 +414,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ; X64-SHLD:       # %bb.0:
 ; X64-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-SHLD-NEXT:    movq (%rdi), %rax
-; X64-SHLD-NEXT:    shll $3, %ecx
+; X64-SHLD-NEXT:    shlq $3, %rcx
 ; X64-SHLD-NEXT:    xorl %esi, %esi
 ; X64-SHLD-NEXT:    shrdq %cl, %rsi, %rax
 ; X64-SHLD-NEXT:    testb $64, %cl
@@ -424,7 +424,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
 ;
 ; X...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/92730


More information about the llvm-commits mailing list