[llvm] be6aed9 - [SLP]Use number of scalars as a vector length for minbw cast
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 13:20:33 PDT 2024
Author: Alexey Bataev
Date: 2024-09-26T13:06:19-07:00
New Revision: be6aed90c70b7ef718c6c9217158933c8dd57372
URL: https://github.com/llvm/llvm-project/commit/be6aed90c70b7ef718c6c9217158933c8dd57372
DIFF: https://github.com/llvm/llvm-project/commit/be6aed90c70b7ef718c6c9217158933c8dd57372.diff
LOG: [SLP]Use number of scalars as a vector length for minbw cast
Need to use the number of scalars, not the vector factor of the node.
Otherwise incorrect casting can be estimated, leading to a compiler
crash.
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 53d7ae606ffeea..48a8627ab63e07 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10357,8 +10357,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
unsigned SrcBWSz = DL->getTypeSizeInBits(UserScalarTy);
unsigned VecOpcode;
- auto *UserVecTy =
- getWidenedType(UserScalarTy, E->getVectorFactor());
+ auto *UserVecTy = getWidenedType(UserScalarTy, E->Scalars.size());
if (BWSz > SrcBWSz)
VecOpcode = Instruction::Trunc;
else
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll
new file mode 100644
index 00000000000000..877c06ab7bcde1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=arm64-apple-macosx11.0.0 < %s | FileCheck %s
+
+define void @test(ptr %pDst, i32 %stride, i8 %0, ptr %p1, ptr %p2, ptr %p4, ptr %p3) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[PDST:%.*]], i32 [[STRIDE:%.*]], i8 [[TMP0:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]], ptr [[P4:%.*]], ptr [[P3:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[MUL100:%.*]] = mul i32 [[STRIDE]], 9
+; CHECK-NEXT: [[MUL101:%.*]] = mul i32 [[STRIDE]], 7
+; CHECK-NEXT: [[MUL102:%.*]] = mul i32 [[STRIDE]], 5
+; CHECK-NEXT: [[MUL103:%.*]] = mul i32 [[STRIDE]], 3
+; CHECK-NEXT: [[CONV111:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL112:%.*]] = mul i32 [[CONV111]], 14
+; CHECK-NEXT: [[CONV117:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL118:%.*]] = mul i32 [[CONV117]], 14
+; CHECK-NEXT: [[CONV124:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL125:%.*]] = mul i32 [[CONV124]], 14
+; CHECK-NEXT: [[CONV131:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL132:%.*]] = mul i32 [[CONV131]], 14
+; CHECK-NEXT: [[CMP139:%.*]] = icmp uge i32 [[MUL112]], [[MUL100]]
+; CHECK-NEXT: [[CMP142:%.*]] = icmp uge i32 [[MUL112]], [[MUL101]]
+; CHECK-NEXT: [[ADD1441:%.*]] = or i1 [[CMP139]], [[CMP142]]
+; CHECK-NEXT: [[CMP145:%.*]] = icmp uge i32 [[MUL112]], [[MUL102]]
+; CHECK-NEXT: [[ADD1472:%.*]] = or i1 [[ADD1441]], [[CMP145]]
+; CHECK-NEXT: [[CMP148:%.*]] = icmp uge i32 [[MUL112]], [[MUL103]]
+; CHECK-NEXT: [[ADD1504:%.*]] = or i1 [[ADD1472]], [[CMP148]]
+; CHECK-NEXT: [[ADD151:%.*]] = zext i1 [[ADD1504]] to i64
+; CHECK-NEXT: [[ARRAYIDX156:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 0, i64 [[ADD151]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX156]], align 4
+; CHECK-NEXT: [[CMP165:%.*]] = icmp uge i32 [[MUL118]], [[MUL101]]
+; CHECK-NEXT: [[CMP171:%.*]] = icmp uge i32 [[MUL118]], [[MUL103]]
+; CHECK-NEXT: [[ADD1734:%.*]] = or i1 [[CMP165]], [[CMP171]]
+; CHECK-NEXT: [[ADD173:%.*]] = zext i1 [[ADD1734]] to i64
+; CHECK-NEXT: [[ARRAYIDX178:%.*]] = getelementptr [8 x i32], ptr [[P2]], i64 0, i64 [[ADD173]]
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX178]], align 4
+; CHECK-NEXT: [[CMP185:%.*]] = icmp uge i32 [[MUL125]], [[MUL100]]
+; CHECK-NEXT: [[CMP188:%.*]] = icmp uge i32 [[MUL125]], [[MUL101]]
+; CHECK-NEXT: [[ADD1905:%.*]] = or i1 [[CMP185]], [[CMP188]]
+; CHECK-NEXT: [[CMP191:%.*]] = icmp uge i32 [[MUL125]], [[MUL102]]
+; CHECK-NEXT: [[ADD1936:%.*]] = or i1 [[ADD1905]], [[CMP191]]
+; CHECK-NEXT: [[ADD193:%.*]] = zext i1 [[ADD1936]] to i64
+; CHECK-NEXT: [[ARRAYIDX201:%.*]] = getelementptr [8 x i32], ptr [[P4]], i64 0, i64 [[ADD193]]
+; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX201]], align 4
+; CHECK-NEXT: [[CMP208:%.*]] = icmp uge i32 [[MUL132]], [[MUL100]]
+; CHECK-NEXT: [[CMP211:%.*]] = icmp uge i32 [[MUL132]], [[MUL101]]
+; CHECK-NEXT: [[ADD2137:%.*]] = or i1 [[CMP208]], [[CMP211]]
+; CHECK-NEXT: [[CMP214:%.*]] = icmp uge i32 [[MUL132]], [[MUL102]]
+; CHECK-NEXT: [[ADD2168:%.*]] = or i1 [[ADD2137]], [[CMP214]]
+; CHECK-NEXT: [[CMP217:%.*]] = icmp uge i32 [[MUL132]], [[MUL103]]
+; CHECK-NEXT: [[ADD2199:%.*]] = or i1 [[ADD2168]], [[CMP217]]
+; CHECK-NEXT: [[ADD219:%.*]] = zext i1 [[ADD2199]] to i64
+; CHECK-NEXT: [[ARRAYIDX224:%.*]] = getelementptr [8 x i32], ptr [[P3]], i64 0, i64 [[ADD219]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX224]], align 4
+; CHECK-NEXT: [[CONV230:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL231:%.*]] = mul i32 [[CONV230]], 14
+; CHECK-NEXT: [[CONV237:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL238:%.*]] = mul i32 [[CONV237]], 14
+; CHECK-NEXT: [[CONV244:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL245:%.*]] = mul i32 [[CONV244]], 14
+; CHECK-NEXT: [[CONV484:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL485:%.*]] = mul i32 [[CONV484]], 14
+; CHECK-NEXT: [[CMP262:%.*]] = icmp uge i32 [[MUL231]], [[MUL101]]
+; CHECK-NEXT: [[CMP268:%.*]] = icmp uge i32 [[MUL231]], [[MUL103]]
+; CHECK-NEXT: [[ADD1503:%.*]] = or i1 [[CMP262]], [[CMP268]]
+; CHECK-NEXT: [[ADD150:%.*]] = zext i1 [[ADD1503]] to i64
+; CHECK-NEXT: [[ARRAYIDX155:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 0, i64 [[ADD150]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX155]], align 4
+; CHECK-NEXT: [[OR951:%.*]] = or i32 [[TMP13]], [[TMP18]]
+; CHECK-NEXT: [[CMP282:%.*]] = icmp uge i32 [[MUL238]], [[MUL100]]
+; CHECK-NEXT: [[CMP285:%.*]] = icmp uge i32 [[MUL238]], [[MUL101]]
+; CHECK-NEXT: [[ADD28711:%.*]] = or i1 [[CMP282]], [[CMP285]]
+; CHECK-NEXT: [[CMP288:%.*]] = icmp uge i32 [[MUL238]], [[MUL102]]
+; CHECK-NEXT: [[ADD29012:%.*]] = or i1 [[ADD28711]], [[CMP288]]
+; CHECK-NEXT: [[CMP291:%.*]] = icmp uge i32 [[MUL238]], [[MUL103]]
+; CHECK-NEXT: [[ADD29313:%.*]] = or i1 [[ADD29012]], [[CMP291]]
+; CHECK-NEXT: [[ADD293:%.*]] = zext i1 [[ADD29313]] to i64
+; CHECK-NEXT: [[ARRAYIDX298:%.*]] = getelementptr [8 x i32], ptr [[P2]], i64 0, i64 [[ADD293]]
+; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX298]], align 4
+; CHECK-NEXT: [[OR301952:%.*]] = or i32 [[TMP21]], [[TMP12]]
+; CHECK-NEXT: [[CMP310:%.*]] = icmp uge i32 [[MUL245]], [[MUL101]]
+; CHECK-NEXT: [[CMP316:%.*]] = icmp uge i32 [[MUL245]], [[MUL103]]
+; CHECK-NEXT: [[ADD31814:%.*]] = or i1 [[CMP310]], [[CMP316]]
+; CHECK-NEXT: [[ADD318:%.*]] = zext i1 [[ADD31814]] to i64
+; CHECK-NEXT: [[ARRAYIDX323:%.*]] = getelementptr [8 x i32], ptr [[P4]], i64 0, i64 [[ADD318]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX323]], align 4
+; CHECK-NEXT: [[OR326953:%.*]] = or i32 [[TMP14]], [[TMP19]]
+; CHECK-NEXT: [[CMP332:%.*]] = icmp uge i32 [[MUL485]], [[MUL100]]
+; CHECK-NEXT: [[CMP335:%.*]] = icmp uge i32 [[MUL485]], [[MUL101]]
+; CHECK-NEXT: [[ADD33715:%.*]] = or i1 [[CMP332]], [[CMP335]]
+; CHECK-NEXT: [[CMP338:%.*]] = icmp uge i32 [[MUL485]], [[MUL102]]
+; CHECK-NEXT: [[ADD34016:%.*]] = or i1 [[ADD33715]], [[CMP338]]
+; CHECK-NEXT: [[CMP341:%.*]] = icmp uge i32 [[MUL485]], [[MUL103]]
+; CHECK-NEXT: [[ADD34317:%.*]] = or i1 [[ADD34016]], [[CMP341]]
+; CHECK-NEXT: [[ADD343:%.*]] = zext i1 [[ADD34317]] to i64
+; CHECK-NEXT: [[ARRAYIDX348:%.*]] = getelementptr [8 x i32], ptr [[P3]], i64 0, i64 [[ADD343]]
+; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX348]], align 4
+; CHECK-NEXT: [[OR351954:%.*]] = or i32 [[TMP22]], [[TMP20]]
+; CHECK-NEXT: [[CONV485:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL486:%.*]] = mul i32 [[CONV485]], 14
+; CHECK-NEXT: [[CONV491:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL492:%.*]] = mul i32 [[CONV491]], 14
+; CHECK-NEXT: [[CONV498:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL499:%.*]] = mul i32 [[CONV498]], 14
+; CHECK-NEXT: [[CONV505:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[MUL506:%.*]] = mul i32 [[CONV505]], 14
+; CHECK-NEXT: [[CMP519:%.*]] = icmp uge i32 [[MUL486]], [[MUL102]]
+; CHECK-NEXT: [[CMP522:%.*]] = icmp uge i32 [[MUL486]], [[MUL103]]
+; CHECK-NEXT: [[ADD52418:%.*]] = or i1 [[CMP519]], [[CMP522]]
+; CHECK-NEXT: [[ADD524:%.*]] = zext i1 [[ADD52418]] to i64
+; CHECK-NEXT: [[ARRAYIDX529:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 0, i64 [[ADD524]]
+; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX529]], align 4
+; CHECK-NEXT: [[CMP541:%.*]] = icmp uge i32 [[MUL492]], [[MUL101]]
+; CHECK-NEXT: [[CMP544:%.*]] = icmp uge i32 [[MUL492]], [[MUL102]]
+; CHECK-NEXT: [[ADD54619:%.*]] = or i1 [[CMP541]], [[CMP544]]
+; CHECK-NEXT: [[CMP547:%.*]] = icmp uge i32 [[MUL492]], [[MUL103]]
+; CHECK-NEXT: [[ADD54920:%.*]] = or i1 [[ADD54619]], [[CMP547]]
+; CHECK-NEXT: [[ADD549:%.*]] = zext i1 [[ADD54920]] to i64
+; CHECK-NEXT: [[ARRAYIDX554:%.*]] = getelementptr [8 x i32], ptr [[P2]], i64 0, i64 [[ADD549]]
+; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX554]], align 4
+; CHECK-NEXT: [[CMP572:%.*]] = icmp uge i32 [[MUL499]], [[MUL103]]
+; CHECK-NEXT: [[CONV573:%.*]] = zext i1 [[CMP572]] to i64
+; CHECK-NEXT: [[ARRAYIDX579:%.*]] = getelementptr [8 x i32], ptr [[P4]], i64 0, i64 [[CONV573]]
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX579]], align 4
+; CHECK-NEXT: [[CMP594:%.*]] = icmp uge i32 [[MUL506]], [[MUL102]]
+; CHECK-NEXT: [[CONV595:%.*]] = zext i1 [[CMP594]] to i64
+; CHECK-NEXT: [[ARRAYIDX604:%.*]] = getelementptr [8 x i32], ptr [[P3]], i64 0, i64 [[CONV595]]
+; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX604]], align 4
+; CHECK-NEXT: [[OR4791159:%.*]] = or i32 [[OR301952]], [[OR951]]
+; CHECK-NEXT: [[OR6071160:%.*]] = or i32 [[OR4791159]], [[OR326953]]
+; CHECK-NEXT: [[OR4541161:%.*]] = or i32 [[OR6071160]], [[OR351954]]
+; CHECK-NEXT: [[SHL58111621163:%.*]] = or i32 [[TMP27]], [[TMP29]]
+; CHECK-NEXT: [[SHL55611641165:%.*]] = or i32 [[TMP25]], [[SHL58111621163]]
+; CHECK-NEXT: [[SHL53111661167:%.*]] = or i32 [[TMP23]], [[SHL55611641165]]
+; CHECK-NEXT: [[SHL5311166:%.*]] = trunc i32 [[SHL53111661167]] to i8
+; CHECK-NEXT: [[CONV616:%.*]] = trunc i32 [[OR4541161]] to i8
+; CHECK-NEXT: [[ARRAYIDX617:%.*]] = getelementptr i8, ptr [[PDST]], i64 4
+; CHECK-NEXT: store i8 [[CONV616]], ptr [[ARRAYIDX617]], align 1
+; CHECK-NEXT: store i8 [[SHL5311166]], ptr [[PDST]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %mul100 = mul i32 %stride, 9
+ %mul101 = mul i32 %stride, 7
+ %mul102 = mul i32 %stride, 5
+ %mul103 = mul i32 %stride, 3
+ %conv111 = zext i8 %0 to i32
+ %mul112 = mul i32 %conv111, 14
+ %conv117 = zext i8 %0 to i32
+ %mul118 = mul i32 %conv117, 14
+ %conv124 = zext i8 %0 to i32
+ %mul125 = mul i32 %conv124, 14
+ %conv131 = zext i8 %0 to i32
+ %mul132 = mul i32 %conv131, 14
+ %cmp139 = icmp uge i32 %mul112, %mul100
+ %cmp142 = icmp uge i32 %mul112, %mul101
+ %add1441 = or i1 %cmp139, %cmp142
+ %cmp145 = icmp uge i32 %mul112, %mul102
+ %add1472 = or i1 %add1441, %cmp145
+ %cmp148 = icmp uge i32 %mul112, %mul103
+ %add1503 = or i1 %add1472, %cmp148
+ %add150 = zext i1 %add1503 to i64
+ %arrayidx155 = getelementptr [8 x i32], ptr %p1, i64 0, i64 %add150
+ %1 = load i32, ptr %arrayidx155, align 4
+ %cmp165 = icmp uge i32 %mul118, %mul101
+ %cmp171 = icmp uge i32 %mul118, %mul103
+ %add1734 = or i1 %cmp165, %cmp171
+ %add173 = zext i1 %add1734 to i64
+ %arrayidx178 = getelementptr [8 x i32], ptr %p2, i64 0, i64 %add173
+ %2 = load i32, ptr %arrayidx178, align 4
+ %cmp185 = icmp uge i32 %mul125, %mul100
+ %cmp188 = icmp uge i32 %mul125, %mul101
+ %add1905 = or i1 %cmp185, %cmp188
+ %cmp191 = icmp uge i32 %mul125, %mul102
+ %add1936 = or i1 %add1905, %cmp191
+ %add193 = zext i1 %add1936 to i64
+ %arrayidx201 = getelementptr [8 x i32], ptr %p4, i64 0, i64 %add193
+ %3 = load i32, ptr %arrayidx201, align 4
+ %cmp208 = icmp uge i32 %mul132, %mul100
+ %cmp211 = icmp uge i32 %mul132, %mul101
+ %add2137 = or i1 %cmp208, %cmp211
+ %cmp214 = icmp uge i32 %mul132, %mul102
+ %add2168 = or i1 %add2137, %cmp214
+ %cmp217 = icmp uge i32 %mul132, %mul103
+ %add2199 = or i1 %add2168, %cmp217
+ %add219 = zext i1 %add2199 to i64
+ %arrayidx224 = getelementptr [8 x i32], ptr %p3, i64 0, i64 %add219
+ %4 = load i32, ptr %arrayidx224, align 4
+ %conv230 = zext i8 %0 to i32
+ %mul231 = mul i32 %conv230, 14
+ %conv237 = zext i8 %0 to i32
+ %mul238 = mul i32 %conv237, 14
+ %conv244 = zext i8 %0 to i32
+ %mul245 = mul i32 %conv244, 14
+ %conv251 = zext i8 %0 to i32
+ %mul252 = mul i32 %conv251, 14
+ %cmp262 = icmp uge i32 %mul231, %mul101
+ %cmp268 = icmp uge i32 %mul231, %mul103
+ %add27010 = or i1 %cmp262, %cmp268
+ %add270 = zext i1 %add27010 to i64
+ %arrayidx275 = getelementptr [8 x i32], ptr %p1, i64 0, i64 %add270
+ %5 = load i32, ptr %arrayidx275, align 4
+ %or951 = or i32 %5, %1
+ %cmp282 = icmp uge i32 %mul238, %mul100
+ %cmp285 = icmp uge i32 %mul238, %mul101
+ %add28711 = or i1 %cmp282, %cmp285
+ %cmp288 = icmp uge i32 %mul238, %mul102
+ %add29012 = or i1 %add28711, %cmp288
+ %cmp291 = icmp uge i32 %mul238, %mul103
+ %add29313 = or i1 %add29012, %cmp291
+ %add293 = zext i1 %add29313 to i64
+ %arrayidx298 = getelementptr [8 x i32], ptr %p2, i64 0, i64 %add293
+ %6 = load i32, ptr %arrayidx298, align 4
+ %or301952 = or i32 %6, %2
+ %cmp310 = icmp uge i32 %mul245, %mul101
+ %cmp316 = icmp uge i32 %mul245, %mul103
+ %add31814 = or i1 %cmp310, %cmp316
+ %add318 = zext i1 %add31814 to i64
+ %arrayidx323 = getelementptr [8 x i32], ptr %p4, i64 0, i64 %add318
+ %7 = load i32, ptr %arrayidx323, align 4
+ %or326953 = or i32 %7, %3
+ %cmp332 = icmp uge i32 %mul252, %mul100
+ %cmp335 = icmp uge i32 %mul252, %mul101
+ %add33715 = or i1 %cmp332, %cmp335
+ %cmp338 = icmp uge i32 %mul252, %mul102
+ %add34016 = or i1 %add33715, %cmp338
+ %cmp341 = icmp uge i32 %mul252, %mul103
+ %add34317 = or i1 %add34016, %cmp341
+ %add343 = zext i1 %add34317 to i64
+ %arrayidx348 = getelementptr [8 x i32], ptr %p3, i64 0, i64 %add343
+ %8 = load i32, ptr %arrayidx348, align 4
+ %or351954 = or i32 %8, %4
+ %conv484 = zext i8 %0 to i32
+ %mul485 = mul i32 %conv484, 14
+ %conv491 = zext i8 %0 to i32
+ %mul492 = mul i32 %conv491, 14
+ %conv498 = zext i8 %0 to i32
+ %mul499 = mul i32 %conv498, 14
+ %conv505 = zext i8 %0 to i32
+ %mul506 = mul i32 %conv505, 14
+ %cmp519 = icmp uge i32 %mul485, %mul102
+ %cmp522 = icmp uge i32 %mul485, %mul103
+ %add52418 = or i1 %cmp519, %cmp522
+ %add524 = zext i1 %add52418 to i64
+ %arrayidx529 = getelementptr [8 x i32], ptr %p1, i64 0, i64 %add524
+ %9 = load i32, ptr %arrayidx529, align 4
+ %cmp541 = icmp uge i32 %mul492, %mul101
+ %cmp544 = icmp uge i32 %mul492, %mul102
+ %add54619 = or i1 %cmp541, %cmp544
+ %cmp547 = icmp uge i32 %mul492, %mul103
+ %add54920 = or i1 %add54619, %cmp547
+ %add549 = zext i1 %add54920 to i64
+ %arrayidx554 = getelementptr [8 x i32], ptr %p2, i64 0, i64 %add549
+ %10 = load i32, ptr %arrayidx554, align 4
+ %cmp572 = icmp uge i32 %mul499, %mul103
+ %conv573 = zext i1 %cmp572 to i64
+ %arrayidx579 = getelementptr [8 x i32], ptr %p4, i64 0, i64 %conv573
+ %11 = load i32, ptr %arrayidx579, align 4
+ %cmp594 = icmp uge i32 %mul506, %mul102
+ %conv595 = zext i1 %cmp594 to i64
+ %arrayidx604 = getelementptr [8 x i32], ptr %p3, i64 0, i64 %conv595
+ %12 = load i32, ptr %arrayidx604, align 4
+ %or4791159 = or i32 %or301952, %or951
+ %or6071160 = or i32 %or4791159, %or326953
+ %or4541161 = or i32 %or6071160, %or351954
+ %shl58111621163 = or i32 %11, %12
+ %shl55611641165 = or i32 %10, %shl58111621163
+ %shl53111661167 = or i32 %9, %shl55611641165
+ %shl5311166 = trunc i32 %shl53111661167 to i8
+ %conv616 = trunc i32 %or4541161 to i8
+ %arrayidx617 = getelementptr i8, ptr %pDst, i64 4
+ store i8 %conv616, ptr %arrayidx617, align 1
+ store i8 %shl5311166, ptr %pDst, align 1
+ ret void
+}
More information about the llvm-commits
mailing list