[llvm] c8c4bd1 - [LV] Stengthen loop-invariance checks in isPredicatedInst (#140744)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 06:01:51 PDT 2025
Author: Ramkumar Ramachandra
Date: 2025-06-20T14:01:48+01:00
New Revision: c8c4bd1ebc6e4451dc835a77bacdbe6a0467f219
URL: https://github.com/llvm/llvm-project/commit/c8c4bd1ebc6e4451dc835a77bacdbe6a0467f219
DIFF: https://github.com/llvm/llvm-project/commit/c8c4bd1ebc6e4451dc835a77bacdbe6a0467f219.diff
LOG: [LV] Stengthen loop-invariance checks in isPredicatedInst (#140744)
Check loop-invariance against SCEV as well.
Added:
llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll
Modified:
llvm/lib/Analysis/LoopAccessAnalysis.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 00cdb66d8b779..94b9fe9581264 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2764,8 +2764,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
bool LoopAccessInfo::isInvariant(Value *V) const {
auto *SE = PSE->getSE();
- // TODO: Is this really what we want? Even without FP SCEV, we may want some
- // trivially loop-invariant FP values to be considered invariant.
+ if (TheLoop->isLoopInvariant(V))
+ return true;
if (!SE->isSCEVable(V->getType()))
return false;
const SCEV *S = SE->getSCEV(V);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d9f53c4146c28..88b2ffba1b79f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3104,14 +3104,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// is correct. The easiest form of the later is to require that all values
// stored are the same.
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
- TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
+ Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
}
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// If the divisor is loop-invariant no predication is needed.
- return !TheLoop->isLoopInvariant(I->getOperand(1));
+ return !Legal->isInvariant(I->getOperand(1));
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 51a8b451dffd9..a1201dcfbdf57 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -17,126 +17,16 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
-; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP8]], i32 0
-; CHECK-NEXT: store i8 [[TMP19]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[VECTOR_BODY]]
-; CHECK: pred.store.continue:
-; CHECK-NEXT: [[CMP_N:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
-; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
-; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP8]], i32 1
-; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[SCALAR_PH]]
-; CHECK: pred.store.continue4:
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
-; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
-; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP8]], i32 2
-; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
-; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
-; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
-; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP8]], i32 3
-; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]]
-; CHECK: pred.store.continue8:
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
-; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
-; CHECK: pred.store.if9:
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP8]], i32 4
-; CHECK-NEXT: store i8 [[TMP18]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
-; CHECK: pred.store.continue10:
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
-; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
-; CHECK: pred.store.if11:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP8]], i32 5
-; CHECK-NEXT: store i8 [[TMP20]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
-; CHECK: pred.store.continue12:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
-; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
-; CHECK: pred.store.if13:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP8]], i32 6
-; CHECK-NEXT: store i8 [[TMP22]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
-; CHECK: pred.store.continue14:
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
-; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
-; CHECK: pred.store.if15:
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP8]], i32 7
-; CHECK-NEXT: store i8 [[TMP24]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
-; CHECK: pred.store.continue16:
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
-; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
-; CHECK: pred.store.if17:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP8]], i32 8
-; CHECK-NEXT: store i8 [[TMP26]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
-; CHECK: pred.store.continue18:
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
-; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
-; CHECK: pred.store.if19:
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP8]], i32 9
-; CHECK-NEXT: store i8 [[TMP28]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
-; CHECK: pred.store.continue20:
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
-; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
-; CHECK: pred.store.if21:
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP8]], i32 10
-; CHECK-NEXT: store i8 [[TMP30]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
-; CHECK: pred.store.continue22:
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
-; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
-; CHECK: pred.store.if23:
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP8]], i32 11
-; CHECK-NEXT: store i8 [[TMP32]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
-; CHECK: pred.store.continue24:
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
-; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
-; CHECK: pred.store.if25:
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP8]], i32 12
-; CHECK-NEXT: store i8 [[TMP34]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
-; CHECK: pred.store.continue26:
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
-; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
-; CHECK: pred.store.if27:
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP8]], i32 13
-; CHECK-NEXT: store i8 [[TMP36]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
-; CHECK: pred.store.continue28:
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
-; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
-; CHECK: pred.store.if29:
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP8]], i32 14
-; CHECK-NEXT: store i8 [[TMP38]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
-; CHECK: pred.store.continue30:
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
-; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE8]]
-; CHECK: pred.store.if31:
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
-; CHECK: pred.store.continue32:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll
new file mode 100644
index 0000000000000..0a975108edeed
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll
@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+
+define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) {
+; CHECK-LABEL: define void @loop_invariant_store(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[TMP0]], splat (i64 52)
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
+; CHECK-NEXT: store i8 [[TMP9]], ptr [[P]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2
+; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52
+; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32
+; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[COND_FALSE]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], %[[LOOP_HEADER]] ], [ [[ZEXT]], %[[COND_FALSE]] ]
+; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
+; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %iv.next = add i32 %iv, 1
+ %cmp.slt = icmp slt i32 %iv, 2
+ %shl = shl i64 %a, 48
+ %ashr = ashr i64 %shl, 52
+ %trunc.i32 = trunc i64 %ashr to i32
+ br i1 %cmp.slt, label %cond.false, label %loop.latch
+
+cond.false: ; preds = %loop.header
+ %zext = zext i8 %b to i32
+ br label %loop.latch
+
+loop.latch: ; preds = %cond.false, %loop.header
+ %cond = phi i32 [ %trunc.i32, %loop.header ], [ %zext, %cond.false ]
+ %shl.i32 = shl i32 %cond, 8
+ %trunc = trunc i32 %shl.i32 to i8
+ store i8 %trunc, ptr %p, align 1
+ %exitcond = icmp slt i32 %iv, 8
+ br i1 %exitcond, label %loop.header, label %exit
+
+exit: ; preds = %loop.latch
+ ret void
+}
+
+define void @loop_invariant_srem(ptr %p, i64 %a, i8 %b) {
+; CHECK-LABEL: define void @loop_invariant_srem(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[TMP0]], splat (i64 52)
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE10:.*]] ]
+; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE10]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_IND:%.*]] = add <4 x i32> [[BROADCAST_SPLAT4]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i8> [[VEC_IND1]], splat (i8 2)
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
+; CHECK-NEXT: [[TMP11:%.*]] = srem <4 x i8> [[VEC_IND1]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i8> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP13]]
+; CHECK-NEXT: store i32 4, ptr [[TMP12]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP16]]
+; CHECK-NEXT: store i32 4, ptr [[TMP15]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK: [[PRED_STORE_IF7]]:
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i8> [[TMP11]], i32 2
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP20]]
+; CHECK-NEXT: store i32 4, ptr [[TMP19]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; CHECK: [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10]]
+; CHECK: [[PRED_STORE_IF9]]:
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i8> [[TMP11]], i32 3
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[P]], i8 [[TMP21]]
+; CHECK-NEXT: store i32 4, ptr [[TMP23]], align 4
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; CHECK: [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND1]], splat (i8 4)
+; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1
+; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i8 [[IV]], 2
+; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52
+; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32
+; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[COND_FALSE]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], %[[LOOP_HEADER]] ], [ [[ZEXT]], %[[COND_FALSE]] ]
+; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
+; CHECK-NEXT: [[REM:%.*]] = srem i8 [[IV]], [[TRUNC]]
+; CHECK-NEXT: [[GEP_P_REM:%.*]] = getelementptr i32, ptr [[P]], i8 [[REM]]
+; CHECK-NEXT: store i32 4, ptr [[GEP_P_REM]], align 4
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV]], 8
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %iv.next = add i8 %iv, 1
+ %cmp.slt = icmp slt i8 %iv, 2
+ %shl = shl i64 %a, 48
+ %ashr = ashr i64 %shl, 52
+ %trunc.i32 = trunc i64 %ashr to i32
+ br i1 %cmp.slt, label %cond.false, label %loop.latch
+
+cond.false: ; preds = %loop.header
+ %zext = zext i8 %b to i32
+ br label %loop.latch
+
+loop.latch: ; preds = %cond.false, %loop.header
+ %cond = phi i32 [ %trunc.i32, %loop.header ], [ %zext, %cond.false ]
+ %shl.i32 = shl i32 %cond, 8
+ %trunc = trunc i32 %shl.i32 to i8
+ %rem = srem i8 %iv, %trunc
+ %gep.p.rem = getelementptr i32, ptr %p, i8 %rem
+ store i32 4, ptr %gep.p.rem
+ %ec = icmp eq i8 %iv, 8
+ br i1 %ec, label %exit, label %loop.header
+
+exit: ; preds = %loop.latch
+ ret void
+}
+
+define void @loop_invariant_float_store(ptr %p, i32 %a) {
+; CHECK-LABEL: define void @loop_invariant_float_store(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP10:%.*]] = sitofp i32 [[A]] to float
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: store float [[TMP10]], ptr [[P]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2
+; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[COND_FALSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: store float [[TMP10]], ptr [[P]], align 4
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[IV]], 8
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.conv = sitofp i32 %a to float
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %iv.next = add i32 %iv, 1
+ %cmp.slt = icmp slt i32 %iv, 2
+ br i1 %cmp.slt, label %cond.false, label %loop.latch
+
+cond.false: ; preds = %loop.header
+ br label %loop.latch
+
+loop.latch: ; preds = %cond.false, %loop.header
+ store float %a.conv, ptr %p
+ %exitcond = icmp slt i32 %iv, 8
+ br i1 %exitcond, label %loop.header, label %exit
+
+exit: ; preds = %loop.latch
+ ret void
+}
More information about the llvm-commits
mailing list