[llvm] [SDAG] Fix type checks in `ShrinkDemandedOp` to avoid creating invalid truncates (PR #92730)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 03:06:38 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
In `TargetLowering::ShrinkDemandedOp`, types of lhs and rhs may differ before legalization so we should check the both.
In the original case, `VT` is `i64` and `SmallVT` is `i32`, but the type of rhs is `i8`.
See the description about ISD::SHL for further information:
> After legalization, the type of the shift amount is known to be TLI.getShiftAmountTy(). Before legalization the shift amount can be any type, but care must be taken to ensure it is large enough.
https://github.com/llvm/llvm-project/blob/605ae4e93be8976095c7eedf5c08bfdb9ff71257/llvm/include/llvm/CodeGen/ISDOpcodes.h#L691-L712
Fixes https://github.com/llvm/llvm-project/issues/92720.
---
Patch is 39.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92730.diff
12 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+5-1)
- (modified) llvm/test/CodeGen/X86/btc_bts_btr.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/narrow-shl-cst.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/pr27202.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr49162.ll (+1-4)
- (added) llvm/test/CodeGen/X86/pr92720.ll (+15)
- (modified) llvm/test/CodeGen/X86/scheduler-backtracking.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+36-48)
- (modified) llvm/test/CodeGen/X86/vector-zext.ll (+24-42)
- (modified) llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll (+26-26)
- (modified) llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll (+30-30)
- (modified) llvm/test/CodeGen/X86/zext-logicop-shift-load.ll (+1-1)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3ec6b9b795079..32a8dfdd3f75d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -599,7 +599,11 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
- if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
+ // Types of LHS and RHS may differ before legalization (e.g., shl), so we
+ // need to check both.
+ if (TLI.isTruncateFree(Op.getOperand(0).getValueType(), SmallVT) &&
+ TLI.isTruncateFree(Op.getOperand(1).getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, VT)) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll
index efd9d1105d975..e110e5c9274db 100644
--- a/llvm/test/CodeGen/X86/btc_bts_btr.ll
+++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll
@@ -1021,7 +1021,7 @@ define i64 @btr_64_mask_zeros(i64 %x, i64 %n) {
; X64-LABEL: btr_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shll $2, %esi
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btrq %rsi, %rax
; X64-NEXT: retq
;
@@ -1056,7 +1056,7 @@ define i64 @bts_64_mask_zeros(i64 %x, i64 %n) {
; X64-LABEL: bts_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shll $2, %esi
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btsq %rsi, %rax
; X64-NEXT: retq
;
@@ -1088,7 +1088,7 @@ define i64 @btc_64_mask_zeros(i64 %x, i64 %n) {
; X64-LABEL: btc_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shll $2, %esi
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btcq %rsi, %rax
; X64-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/narrow-shl-cst.ll b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
index 296ef52c3bff9..107f14a0e2d2c 100644
--- a/llvm/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/llvm/test/CodeGen/X86/narrow-shl-cst.ll
@@ -151,7 +151,7 @@ define i32 @test12(i32 %x, ptr %y) nounwind {
define i64 @test13(i64 %x, ptr %y) nounwind {
; CHECK-LABEL: test13:
; CHECK: # %bb.0:
-; CHECK-NEXT: addl %edi, %edi
+; CHECK-NEXT: addq %rdi, %rdi
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: retq
@@ -212,7 +212,7 @@ define i64 @test18(i64 %x) nounwind {
; CHECK-LABEL: test18:
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: shll $10, %eax
+; CHECK-NEXT: shlq $10, %rax
; CHECK-NEXT: retq
%and = shl i64 %x, 10
%shl = and i64 %and, 261120
@@ -234,7 +234,7 @@ define i64 @test20(i64 %x) nounwind {
; CHECK-LABEL: test20:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl %di, %eax
-; CHECK-NEXT: shll $10, %eax
+; CHECK-NEXT: shlq $10, %rax
; CHECK-NEXT: retq
%and = shl i64 %x, 10
%shl = and i64 %and, 67107840
diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll
index 3bd3be62fb4c8..9da22f635c266 100644
--- a/llvm/test/CodeGen/X86/pr27202.ll
+++ b/llvm/test/CodeGen/X86/pr27202.ll
@@ -45,8 +45,8 @@ define zeroext i1 @g(i32 %x) optsize {
define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
; CHECK-LABEL: PR46237:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: shll $6, %eax
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: shlq $6, %rax
; CHECK-NEXT: movzbl %al, %ecx
; CHECK-NEXT: andl $7, %esi
; CHECK-NEXT: andl $7, %edx
diff --git a/llvm/test/CodeGen/X86/pr49162.ll b/llvm/test/CodeGen/X86/pr49162.ll
index 0e65e121531bf..db8cec61acd6b 100644
--- a/llvm/test/CodeGen/X86/pr49162.ll
+++ b/llvm/test/CodeGen/X86/pr49162.ll
@@ -17,10 +17,7 @@ define ptr @PR49162(ptr %base, ptr %ptr160) {
;
; X64-LABEL: PR49162:
; X64: # %bb.0:
-; X64-NEXT: movl 8(%rsi), %eax
-; X64-NEXT: shll $16, %eax
-; X64-NEXT: cltq
-; X64-NEXT: sarq $16, %rax
+; X64-NEXT: movswq 8(%rsi), %rax
; X64-NEXT: leaq (%rdi,%rax,4), %rax
; X64-NEXT: retq
%load160 = load i160, ptr %ptr160, align 4
diff --git a/llvm/test/CodeGen/X86/pr92720.ll b/llvm/test/CodeGen/X86/pr92720.ll
new file mode 100644
index 0000000000000..b2543c08328c7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr92720.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Make sure we don't crash when shrinking the shift amount before legalization.
+define i64 @pr92720(i64 %x) {
+; CHECK-LABEL: pr92720:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $8589934592, %rax # imm = 0x200000000
+; CHECK-NEXT: retq
+ %or = or i64 %x, 255
+ %sub = sub i64 0, %or
+ %shl = shl i64 1, %sub
+ %sext = shl i64 %shl, 32
+ ret i64 %sext
+}
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 785b97d8c2402..53d3367cce4d3 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -13,7 +13,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP-LABEL: test1:
; ILP: # %bb.0:
; ILP-NEXT: movq %rdi, %rax
-; ILP-NEXT: leal (%rsi,%rsi), %ecx
+; ILP-NEXT: leaq (%rsi,%rsi), %rcx
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
@@ -43,7 +43,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: shlq %cl, %rsi
; ILP-NEXT: notb %cl
; ILP-NEXT: shrq %rdx
-; ILP-NEXT: # kill: def $cl killed $cl killed $ecx
+; ILP-NEXT: # kill: def $cl killed $cl killed $rcx
; ILP-NEXT: shrq %cl, %rdx
; ILP-NEXT: orq %rsi, %rdx
; ILP-NEXT: movq %rdx, 16(%rax)
@@ -60,7 +60,7 @@ define i256 @test1(i256 %a) nounwind {
; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp)
-; HYBRID-NEXT: addl %esi, %esi
+; HYBRID-NEXT: addq %rsi, %rsi
; HYBRID-NEXT: addb $3, %sil
; HYBRID-NEXT: movl %esi, %ecx
; HYBRID-NEXT: andb $7, %cl
@@ -97,7 +97,7 @@ define i256 @test1(i256 %a) nounwind {
; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp)
-; BURR-NEXT: addl %esi, %esi
+; BURR-NEXT: addq %rsi, %rsi
; BURR-NEXT: addb $3, %sil
; BURR-NEXT: movl %esi, %ecx
; BURR-NEXT: andb $7, %cl
@@ -126,7 +126,7 @@ define i256 @test1(i256 %a) nounwind {
; SRC-LABEL: test1:
; SRC: # %bb.0:
; SRC-NEXT: movq %rdi, %rax
-; SRC-NEXT: addl %esi, %esi
+; SRC-NEXT: addq %rsi, %rsi
; SRC-NEXT: addb $3, %sil
; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp)
@@ -167,7 +167,7 @@ define i256 @test1(i256 %a) nounwind {
; LIN-LABEL: test1:
; LIN: # %bb.0:
; LIN-NEXT: movq %rdi, %rax
-; LIN-NEXT: leal (%rsi,%rsi), %edx
+; LIN-NEXT: leaq (%rsi,%rsi), %rdx
; LIN-NEXT: addb $3, %dl
; LIN-NEXT: movl %edx, %ecx
; LIN-NEXT: shrb $3, %cl
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 85c1e25c29ed5..739d5b1d32e86 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -3615,13 +3615,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movl 8(%rdi), %ecx
-; SSE2-NEXT: shll $28, %ecx
-; SSE2-NEXT: movq %rax, %rdx
-; SSE2-NEXT: shrq $51, %rdx
-; SSE2-NEXT: shll $15, %edx
-; SSE2-NEXT: orl %ecx, %edx
-; SSE2-NEXT: sarl $15, %edx
-; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: shldq $13, %rax, %rcx
+; SSE2-NEXT: shll $15, %ecx
+; SSE2-NEXT: sarl $15, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: shrq $34, %rax
; SSE2-NEXT: shll $15, %eax
; SSE2-NEXT: sarl $15, %eax
@@ -3644,13 +3641,10 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movl 8(%rdi), %ecx
-; SSSE3-NEXT: shll $28, %ecx
-; SSSE3-NEXT: movq %rax, %rdx
-; SSSE3-NEXT: shrq $51, %rdx
-; SSSE3-NEXT: shll $15, %edx
-; SSSE3-NEXT: orl %ecx, %edx
-; SSSE3-NEXT: sarl $15, %edx
-; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: shldq $13, %rax, %rcx
+; SSSE3-NEXT: shll $15, %ecx
+; SSSE3-NEXT: sarl $15, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: shrq $34, %rax
; SSSE3-NEXT: shll $15, %eax
; SSSE3-NEXT: sarl $15, %eax
@@ -3662,53 +3656,47 @@ define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) {
; SSE41-LABEL: sext_4i17_to_4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movq (%rdi), %rax
-; SSE41-NEXT: movq %rax, %rcx
-; SSE41-NEXT: shrq $17, %rcx
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: movq %rax, %rdx
+; SSE41-NEXT: movl 8(%rdi), %esi
+; SSE41-NEXT: shldq $13, %rax, %rsi
+; SSE41-NEXT: shrq $17, %rax
+; SSE41-NEXT: shll $15, %eax
+; SSE41-NEXT: sarl $15, %eax
; SSE41-NEXT: shll $15, %ecx
; SSE41-NEXT: sarl $15, %ecx
-; SSE41-NEXT: movl %eax, %edx
+; SSE41-NEXT: movd %ecx, %xmm0
+; SSE41-NEXT: pinsrd $1, %eax, %xmm0
+; SSE41-NEXT: shrq $34, %rdx
; SSE41-NEXT: shll $15, %edx
; SSE41-NEXT: sarl $15, %edx
-; SSE41-NEXT: movd %edx, %xmm0
-; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT: movq %rax, %rcx
-; SSE41-NEXT: shrq $34, %rcx
-; SSE41-NEXT: shll $15, %ecx
-; SSE41-NEXT: sarl $15, %ecx
-; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
-; SSE41-NEXT: movl 8(%rdi), %ecx
-; SSE41-NEXT: shll $28, %ecx
-; SSE41-NEXT: shrq $51, %rax
-; SSE41-NEXT: shll $15, %eax
-; SSE41-NEXT: orl %ecx, %eax
-; SSE41-NEXT: sarl $15, %eax
-; SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; SSE41-NEXT: pinsrd $2, %edx, %xmm0
+; SSE41-NEXT: shll $15, %esi
+; SSE41-NEXT: sarl $15, %esi
+; SSE41-NEXT: pinsrd $3, %esi, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: sext_4i17_to_4i32:
; AVX: # %bb.0:
; AVX-NEXT: movq (%rdi), %rax
-; AVX-NEXT: movq %rax, %rcx
-; AVX-NEXT: shrq $17, %rcx
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: movq %rax, %rdx
+; AVX-NEXT: movl 8(%rdi), %esi
+; AVX-NEXT: shldq $13, %rax, %rsi
+; AVX-NEXT: shrq $17, %rax
+; AVX-NEXT: shll $15, %eax
+; AVX-NEXT: sarl $15, %eax
; AVX-NEXT: shll $15, %ecx
; AVX-NEXT: sarl $15, %ecx
-; AVX-NEXT: movl %eax, %edx
+; AVX-NEXT: vmovd %ecx, %xmm0
+; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: shrq $34, %rdx
; AVX-NEXT: shll $15, %edx
; AVX-NEXT: sarl $15, %edx
-; AVX-NEXT: vmovd %edx, %xmm0
-; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rcx
-; AVX-NEXT: shrq $34, %rcx
-; AVX-NEXT: shll $15, %ecx
-; AVX-NEXT: sarl $15, %ecx
-; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX-NEXT: movl 8(%rdi), %ecx
-; AVX-NEXT: shll $28, %ecx
-; AVX-NEXT: shrq $51, %rax
-; AVX-NEXT: shll $15, %eax
-; AVX-NEXT: orl %ecx, %eax
-; AVX-NEXT: sarl $15, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
+; AVX-NEXT: shll $15, %esi
+; AVX-NEXT: sarl $15, %esi
+; AVX-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0
; AVX-NEXT: retq
;
; X86-SSE2-LABEL: sext_4i17_to_4i32:
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 74926f46ffa43..25c438cc4c4fc 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2332,11 +2332,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movl 8(%rdi), %ecx
-; SSE2-NEXT: shll $13, %ecx
-; SSE2-NEXT: movq %rax, %rdx
-; SSE2-NEXT: shrq $51, %rdx
-; SSE2-NEXT: orl %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: shldq $13, %rax, %rcx
+; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: shrq $34, %rax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2353,11 +2350,8 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movl 8(%rdi), %ecx
-; SSSE3-NEXT: shll $13, %ecx
-; SSSE3-NEXT: movq %rax, %rdx
-; SSSE3-NEXT: shrq $51, %rdx
-; SSSE3-NEXT: orl %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: shldq $13, %rax, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: shrq $34, %rax
; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@@ -2367,15 +2361,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; SSE41-LABEL: zext_4i17_to_4i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movl 8(%rdi), %eax
-; SSE41-NEXT: shll $13, %eax
-; SSE41-NEXT: movq (%rdi), %rcx
-; SSE41-NEXT: movq %rcx, %rdx
-; SSE41-NEXT: shrq $51, %rdx
-; SSE41-NEXT: orl %eax, %edx
-; SSE41-NEXT: movq %rcx, %rax
+; SSE41-NEXT: movq (%rdi), %rax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: movl 8(%rdi), %edx
+; SSE41-NEXT: shldq $13, %rax, %rdx
; SSE41-NEXT: shrq $17, %rax
-; SSE41-NEXT: movd %ecx, %xmm0
; SSE41-NEXT: pinsrd $1, %eax, %xmm0
; SSE41-NEXT: shrq $34, %rcx
; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
@@ -2385,15 +2376,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; AVX1-LABEL: zext_4i17_to_4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl 8(%rdi), %eax
-; AVX1-NEXT: shll $13, %eax
-; AVX1-NEXT: movq (%rdi), %rcx
-; AVX1-NEXT: movq %rcx, %rdx
-; AVX1-NEXT: shrq $51, %rdx
-; AVX1-NEXT: orl %eax, %edx
-; AVX1-NEXT: movq %rcx, %rax
+; AVX1-NEXT: movq (%rdi), %rax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: movl 8(%rdi), %edx
+; AVX1-NEXT: shldq $13, %rax, %rdx
; AVX1-NEXT: shrq $17, %rax
-; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX1-NEXT: shrq $34, %rcx
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2403,15 +2391,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; AVX2-LABEL: zext_4i17_to_4i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl 8(%rdi), %eax
-; AVX2-NEXT: shll $13, %eax
-; AVX2-NEXT: movq (%rdi), %rcx
-; AVX2-NEXT: movq %rcx, %rdx
-; AVX2-NEXT: shrq $51, %rdx
-; AVX2-NEXT: orl %eax, %edx
-; AVX2-NEXT: movq %rcx, %rax
+; AVX2-NEXT: movq (%rdi), %rax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: movl 8(%rdi), %edx
+; AVX2-NEXT: shldq $13, %rax, %rdx
; AVX2-NEXT: shrq $17, %rax
-; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX2-NEXT: shrq $34, %rcx
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
@@ -2422,15 +2407,12 @@ define <4 x i32> @zext_4i17_to_4i32(ptr %ptr) {
;
; AVX512-LABEL: zext_4i17_to_4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: movl 8(%rdi), %eax
-; AVX512-NEXT: shll $13, %eax
-; AVX512-NEXT: movq (%rdi), %rcx
-; AVX512-NEXT: movq %rcx, %rdx
-; AVX512-NEXT: shrq $51, %rdx
-; AVX512-NEXT: orl %eax, %edx
-; AVX512-NEXT: movq %rcx, %rax
+; AVX512-NEXT: movq (%rdi), %rax
+; AVX512-NEXT: vmovd %eax, %xmm0
+; AVX512-NEXT: movq %rax, %rcx
+; AVX512-NEXT: movl 8(%rdi), %edx
+; AVX512-NEXT: shldq $13, %rax, %rdx
; AVX512-NEXT: shrq $17, %rax
-; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512-NEXT: shrq $34, %rcx
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index 9ae1f270e8833..2611399458c27 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -171,16 +171,16 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movb %al, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: movl (%rdi), %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: movb %al, (%rdx)
@@ -248,16 +248,16 @@ define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movw %ax, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: movl (%rdi), %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: movw %ax, (%rdx)
@@ -324,16 +324,16 @@ define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64
define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
+; X64-NO-BMI2-NEXT: leaq (,%rsi,8), %rcx
; X64-NO-BMI2-NEXT: movl (%rdi), %eax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NO-BMI2-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
; X64-NO-BMI2-NEXT: retq
;
; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: shll $3, %esi
+; X64-BMI2-NEXT: shlq $3, %rsi
; X64-BMI2-NEXT: movl (%rdi), %eax
; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax
; X64-BMI2-NEXT: movl %eax, (%rdx)
@@ -402,7 +402,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-NO-BMI2-NO-SHLD: # %bb.0:
; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax
-; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq $3, %rcx
; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
; X64-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl
@@ -414,7 +414,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
; X64-SHLD: # %bb.0:
; X64-SHLD-NEXT: movq %rsi, %rcx
; X64-SHLD-NEXT: movq (%rdi), %rax
-; X64-SHLD-NEXT: shll $3, %ecx
+; X64-SHLD-NEXT: shlq $3, %rcx
; X64-SHLD-NEXT: xorl %esi, %esi
; X64-SHLD-NEXT: shrdq %cl, %rsi, %rax
; X64-SHLD-NEXT: testb $64, %cl
@@ -424,7 +424,7 @@ define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
;
; X...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/92730
More information about the llvm-commits
mailing list