[llvm] [SLP] Avoid -passes=instcombine stages in SLP tests (PR #146257)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 28 18:53:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Hanyang (Eric) Xu (ericxu233)
<details>
<summary>Changes</summary>
Addresses: #<!-- -->145511
Note that there are still two instances of --passes=slp-vectorizer,instcombine left unchanged because it seems that the tests are meant to run in conjunction with instcombine and removing instcombine would invalidate their original objective:
[llvm/test/Transforms/SLPVectorizer/arith-div-undef.ll](https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/SLPVectorizer/arith-div-undef.ll)
[llvm/test/Transforms/SLPVectorizer/slp-hr-with-reuse.ll](https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/SLPVectorizer/slp-hr-with-reuse.ll)
---
Patch is 538.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146257.diff
38 Files Affected:
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll (+22-18)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll (+60-76)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll (+26-34)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll (+13-13)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll (+273-82)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll (+4-2)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll (+4-2)
- (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll (+25-11)
- (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll (+35-21)
- (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll (+33-25)
- (modified) llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll (+6-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll (+52-52)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll (+58-58)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll (+51-29)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll (+51-29)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll (+80-43)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll (+80-43)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll (+147-92)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll (+147-92)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll (+17-17)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll (+25-19)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll (+39-39)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/hadd.ll (+45-45)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll (+36-36)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/hsub.ll (+36-36)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll (+37-37)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll (+40-46)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll (+17-17)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll (+16-14)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll (+13-13)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll (+235-218)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll (+235-218)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll (+5-5)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll (+4-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll (+69-24)
``````````diff
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index e460f558f4723..8dc88f7b96716 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -S -passes=slp-vectorizer,instcombine -pass-remarks-output=%t | FileCheck %s
+; RUN: opt < %s -S -passes=slp-vectorizer -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
-; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
+; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer' -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -16,10 +16,10 @@ target triple = "aarch64--linux-gnu"
define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @gather_multiple_use(
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i64 3
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 15)
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], splat (i32 65537)
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], splat (i32 65535)
@@ -57,22 +57,26 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
@data = global [6 x [258 x i8]] zeroinitializer, align 1
define void @gather_load(ptr noalias %ptr) {
; CHECK-LABEL: @gather_load(
-; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 2
-; CHECK-NEXT: [[ARRAYIDX183:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 4
-; CHECK-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 6
-; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8
-; CHECK-NEXT: [[L0:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 258), align 1
+; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1
+; CHECK-NEXT: [[ARRAYIDX183:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 2
+; CHECK-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 3
+; CHECK-NEXT: [[ARRAYIDX185:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 4
+; CHECK-NEXT: [[ARRAYIDX149:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 1, i64 0
+; CHECK-NEXT: [[L0:%.*]] = load i8, ptr [[ARRAYIDX149]], align 1
; CHECK-NEXT: [[CONV150:%.*]] = zext i8 [[L0]] to i16
-; CHECK-NEXT: [[ADD152:%.*]] = add nuw nsw i16 [[CONV150]], 10
-; CHECK-NEXT: [[L1:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 517), align 1
+; CHECK-NEXT: [[ADD152:%.*]] = add i16 10, [[CONV150]]
+; CHECK-NEXT: [[ARRAYIDX155:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 2, i64 1
+; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[ARRAYIDX155]], align 1
; CHECK-NEXT: [[CONV156:%.*]] = zext i8 [[L1]] to i16
-; CHECK-NEXT: [[ADD158:%.*]] = add nuw nsw i16 [[CONV156]], 20
-; CHECK-NEXT: [[L2:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 776), align 1
+; CHECK-NEXT: [[ADD158:%.*]] = add i16 20, [[CONV156]]
+; CHECK-NEXT: [[ARRAYIDX161:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 3, i64 2
+; CHECK-NEXT: [[L2:%.*]] = load i8, ptr [[ARRAYIDX161]], align 1
; CHECK-NEXT: [[CONV162:%.*]] = zext i8 [[L2]] to i16
-; CHECK-NEXT: [[ADD164:%.*]] = add nuw nsw i16 [[CONV162]], 30
-; CHECK-NEXT: [[L3:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @data, i64 1035), align 1
+; CHECK-NEXT: [[ADD164:%.*]] = add i16 30, [[CONV162]]
+; CHECK-NEXT: [[ARRAYIDX167:%.*]] = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 4, i64 3
+; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[ARRAYIDX167]], align 1
; CHECK-NEXT: [[CONV168:%.*]] = zext i8 [[L3]] to i16
-; CHECK-NEXT: [[ADD170:%.*]] = add nuw nsw i16 [[CONV168]], 40
+; CHECK-NEXT: [[ADD170:%.*]] = add i16 40, [[CONV168]]
; CHECK-NEXT: store i16 [[ADD152]], ptr [[ARRAYIDX182]], align 2
; CHECK-NEXT: store i16 [[ADD158]], ptr [[ARRAYIDX183]], align 2
; CHECK-NEXT: store i16 [[ADD164]], ptr [[ARRAYIDX184]], align 2
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
index 900d5f293b5b8..4f8fc339fb64b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=GENERIC
-; RUN: opt -S -mcpu=kryo -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=KRYO
+; RUN: opt -S -passes=slp-vectorizer,dce < %s | FileCheck %s --check-prefix=GENERIC
+; RUN: opt -S -mcpu=kryo -passes=slp-vectorizer,dce < %s | FileCheck %s --check-prefix=KRYO
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
@@ -36,57 +36,49 @@ define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture read
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
-; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
-; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
+; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
+; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i32 [[TMP5]]
; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
-; GENERIC-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
+; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
+; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP8]]
; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
-; GENERIC-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
+; GENERIC-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
+; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP9]]
; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
-; GENERIC-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
+; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
+; GENERIC-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP11]]
; GENERIC-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
; GENERIC-NEXT: [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
; GENERIC-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
-; GENERIC-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
-; GENERIC-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
+; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 4
+; GENERIC-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP14]]
; GENERIC-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
; GENERIC-NEXT: [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
; GENERIC-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
-; GENERIC-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
-; GENERIC-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
-; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
+; GENERIC-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP4]], i32 5
+; GENERIC-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP15]]
; GENERIC-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
; GENERIC-NEXT: [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
; GENERIC-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
-; GENERIC-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
-; GENERIC-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
+; GENERIC-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i32 6
+; GENERIC-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP17]]
; GENERIC-NEXT: [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
; GENERIC-NEXT: [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
; GENERIC-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
-; GENERIC-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
-; GENERIC-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
-; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
+; GENERIC-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
+; GENERIC-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP20]]
; GENERIC-NEXT: [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
; GENERIC-NEXT: [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
; GENERIC-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
@@ -109,57 +101,49 @@ define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture read
; KRYO-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; KRYO-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; KRYO-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; KRYO-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; KRYO-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; KRYO-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; KRYO-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; KRYO-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
; KRYO-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
-; KRYO-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
-; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
+; KRYO-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
+; KRYO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i32 [[TMP5]]
; KRYO-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; KRYO-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; KRYO-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
-; KRYO-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
+; KRYO-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; KRYO-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
+; KRYO-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP8]]
; KRYO-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; KRYO-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; KRYO-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; KRYO-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
-; KRYO-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
+; KRYO-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
+; KRYO-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP9]]
; KRYO-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; KRYO-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; KRYO-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; KRYO-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
-; KRYO-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
+; KRYO-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
+; KRYO-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP11]]
; KRYO-NEXT: [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
; KRYO-NEXT: [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
; KRYO-NEXT: [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
-; KRYO-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
-; KRYO-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
+; KRYO-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 4
+; KRYO-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP14]]
; KRYO-NEXT: [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
; KRYO-NEXT: [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
; KRYO-NEXT: [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
-; KRYO-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
-; KRYO-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
-; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
+; KRYO-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP4]], i32 5
+; KRYO-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP15]]
; KRYO-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
; KRYO-NEXT: [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
; KRYO-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
-; KRYO-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
-; KRYO-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
+; KRYO-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i32 6
+; KRYO-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP17]]
; KRYO-NEXT: [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
; KRYO-NEXT: [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
; KRYO-NEXT: [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
-; KRYO-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
-; KRYO-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
-; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
+; KRYO-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
+; KRYO-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i32 [[TMP20]]
; KRYO-NEXT: [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
; KRYO-NEXT: [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
; KRYO-NEXT: [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
@@ -293,55 +277,55 @@ define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture read
; GENERIC-NEXT: [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; GENERIC-NEXT: [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
+; GENERIC-NEXT: [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
; GENERIC-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
; GENERIC-NEXT: [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
; GENERIC-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
; GENERIC-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
-; GENERIC-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
-; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
+; GENERIC-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP1]], [[TMP3]]
+; GENERIC-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i32 0
; GENERIC-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
; GENERIC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
; GENERIC-NEXT: [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; GENERIC-NEXT: [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
-; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
-; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
+; GENERIC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV3]], [[SUM_0102]]
+; GENERIC-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i32 1
; GENERIC-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
; GENERIC-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
; GENERIC-NEXT: [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
; GENERIC-NEXT: [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
; GENERIC-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
-; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
+; GENERIC-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
; GENERIC-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
; GENERIC-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
; GENERIC-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
; GENERIC-NEXT: [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
; GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
-; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
+; GENERIC-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
; GENERI...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146257
More information about the llvm-commits
mailing list