[llvm] cbc50ba - [AggressiveInstCombine] Handle the nested GEP/BitCast scenario in Load Merge.
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 24 02:36:43 PDT 2023
Author: bipmis
Date: 2023-05-24T10:36:11+01:00
New Revision: cbc50ba12e61a3cc311908bfdf532d37f917ccf4
URL: https://github.com/llvm/llvm-project/commit/cbc50ba12e61a3cc311908bfdf532d37f917ccf4
DIFF: https://github.com/llvm/llvm-project/commit/cbc50ba12e61a3cc311908bfdf532d37f917ccf4.diff
LOG: [AggressiveInstCombine] Handle the nested GEP/BitCast scenario in Load Merge.
This seems to be an issue currently where there are nested/chained GEP/BitCast Pointers.
The patch generates a new GEP for the wider load to avoid dominance problems.
Differential Revision: https://reviews.llvm.org/D150864
Added:
Modified:
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index f7b114b53a843..b2925b1a9f2f3 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -766,7 +766,8 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
// pattern which suggests that the loads can be combined. The one and only use
// of the loads is to form a wider load.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
- TargetTransformInfo &TTI, AliasAnalysis &AA) {
+ TargetTransformInfo &TTI, AliasAnalysis &AA,
+ const DominatorTree &DT) {
// Only consider load chains of scalar values.
if (isa<VectorType>(I.getType()))
return false;
@@ -791,15 +792,17 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
if (!Allowed || !Fast)
return false;
- // Make sure the Load pointer of type GEP/non-GEP is above insert point
- Instruction *Inst = dyn_cast<Instruction>(LI1->getPointerOperand());
- if (Inst && Inst->getParent() == LI1->getParent() &&
- !Inst->comesBefore(LOps.RootInsert))
- Inst->moveBefore(LOps.RootInsert);
-
- // New load can be generated
+ // Get the Index and Ptr for the new GEP.
Value *Load1Ptr = LI1->getPointerOperand();
Builder.SetInsertPoint(LOps.RootInsert);
+ if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
+ APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
+ Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
+ DL, Offset1, /* AllowNonInbounds */ true);
+ Load1Ptr = Builder.CreateGEP(Builder.getInt8Ty(), Load1Ptr,
+ Builder.getInt32(Offset1.getZExtValue()));
+ }
+ // Generate wider load.
Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS));
NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(),
LI1->isVolatile(), "");
@@ -936,7 +939,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
MadeChange |= tryToRecognizePopCount(I);
MadeChange |= tryToFPToSat(I, TTI);
MadeChange |= tryToRecognizeTableBasedCttz(I);
- MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA);
+ MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
MadeChange |= foldPatternedLoads(I, DL);
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 8087137c01ee0..fe0a1bcd3870d 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1869,8 +1869,8 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) {
define i32 @loadCombine_4consecutive_badinsert3(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_badinsert3(
-; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1
+; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1
; LE-NEXT: ret i32 [[L1]]
;
; BE-LABEL: @loadCombine_4consecutive_badinsert3(
@@ -2085,3 +2085,82 @@ define i32 @loadCombine_4consecutive_badinsert6(ptr %p) {
%o3 = or i32 %o2, %e1
ret i32 %o3
}
+
+define void @nested_gep(ptr %p, ptr %dest) {
+; LE-LABEL: @nested_gep(
+; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
+; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
+; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; LE-NEXT: ret void
+;
+; BE-LABEL: @nested_gep(
+; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72
+; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
+; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64
+; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32
+; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 64
+; BE-NEXT: [[FINAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[GEP2]], i64 4
+; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4
+; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64
+; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]]
+; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0
+; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32
+; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; BE-NEXT: ret void
+;
+ %gep1 = getelementptr inbounds i8, ptr %p, i64 72
+ %ld1 = load i32, ptr %gep1, align 4
+ %ld1_zext = zext i32 %ld1 to i64
+ %ld1_shl = shl nuw i64 %ld1_zext, 32
+ %gep2 = getelementptr inbounds i8, ptr %p, i64 64
+ ; Don't move final_ptr before gep2
+ %final_ptr = getelementptr inbounds i8, ptr %gep2, i64 4
+ %ld2 = load i32, ptr %final_ptr, align 4
+ %ld2_zext = zext i32 %ld2 to i64
+ %or = or i64 %ld1_shl, %ld2_zext
+ %add = add i64 %or, 0
+ %trunc = trunc i64 %add to i32
+ store i32 %trunc, ptr %dest, align 4
+ ret void
+}
+
+
+define void @bitcast_gep(ptr %p, ptr %dest) {
+; LE-LABEL: @bitcast_gep(
+; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
+; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
+; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; LE-NEXT: ret void
+;
+; BE-LABEL: @bitcast_gep(
+; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72
+; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
+; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64
+; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32
+; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 68
+; BE-NEXT: [[FINAL_PTR:%.*]] = bitcast ptr [[GEP2]] to ptr
+; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4
+; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64
+; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]]
+; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0
+; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32
+; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; BE-NEXT: ret void
+;
+ %gep1 = getelementptr inbounds i8, ptr %p, i64 72
+ %ld1 = load i32, ptr %gep1, align 4
+ %ld1_zext = zext i32 %ld1 to i64
+ %ld1_shl = shl nuw i64 %ld1_zext, 32
+ %gep2 = getelementptr inbounds i8, ptr %p, i64 68
+ ; Don't move final_ptr before gep2
+ %final_ptr = bitcast ptr %gep2 to ptr
+ %ld2 = load i32, ptr %final_ptr, align 4
+ %ld2_zext = zext i32 %ld2 to i64
+ %or = or i64 %ld1_shl, %ld2_zext
+ %add = add i64 %or, 0
+ %trunc = trunc i64 %add to i32
+ store i32 %trunc, ptr %dest, align 4
+ ret void
+}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 842b1f781eac7..88f92baa63ef0 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -2005,8 +2005,8 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) {
define i32 @loadCombine_4consecutive_badinsert3(ptr %p) {
; LE-LABEL: @loadCombine_4consecutive_badinsert3(
-; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
-; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P1]], align 1
+; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1
; LE-NEXT: ret i32 [[L1]]
;
; BE-LABEL: @loadCombine_4consecutive_badinsert3(
@@ -2303,3 +2303,82 @@ define i64 @loadCombine_nonConstShift2(ptr %arg, i8 %b) {
%o7 = or i64 %s1, %s0
ret i64 %o7
}
+
+define void @nested_gep(ptr %p, ptr %dest) {
+; LE-LABEL: @nested_gep(
+; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
+; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
+; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; LE-NEXT: ret void
+;
+; BE-LABEL: @nested_gep(
+; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72
+; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
+; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64
+; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32
+; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 64
+; BE-NEXT: [[FINAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[GEP2]], i64 4
+; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4
+; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64
+; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]]
+; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0
+; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32
+; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; BE-NEXT: ret void
+;
+ %gep1 = getelementptr inbounds i8, ptr %p, i64 72
+ %ld1 = load i32, ptr %gep1, align 4
+ %ld1_zext = zext i32 %ld1 to i64
+ %ld1_shl = shl nuw i64 %ld1_zext, 32
+ %gep2 = getelementptr inbounds i8, ptr %p, i64 64
+ ; Don't move final_ptr before gep2
+ %final_ptr = getelementptr inbounds i8, ptr %gep2, i64 4
+ %ld2 = load i32, ptr %final_ptr, align 4
+ %ld2_zext = zext i32 %ld2 to i64
+ %or = or i64 %ld1_shl, %ld2_zext
+ %add = add i64 %or, 0
+ %trunc = trunc i64 %add to i32
+ store i32 %trunc, ptr %dest, align 4
+ ret void
+}
+
+
+define void @bitcast_gep(ptr %p, ptr %dest) {
+; LE-LABEL: @bitcast_gep(
+; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
+; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
+; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; LE-NEXT: ret void
+;
+; BE-LABEL: @bitcast_gep(
+; BE-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 72
+; BE-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
+; BE-NEXT: [[LD1_ZEXT:%.*]] = zext i32 [[LD1]] to i64
+; BE-NEXT: [[LD1_SHL:%.*]] = shl nuw i64 [[LD1_ZEXT]], 32
+; BE-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 68
+; BE-NEXT: [[FINAL_PTR:%.*]] = bitcast ptr [[GEP2]] to ptr
+; BE-NEXT: [[LD2:%.*]] = load i32, ptr [[FINAL_PTR]], align 4
+; BE-NEXT: [[LD2_ZEXT:%.*]] = zext i32 [[LD2]] to i64
+; BE-NEXT: [[OR:%.*]] = or i64 [[LD1_SHL]], [[LD2_ZEXT]]
+; BE-NEXT: [[ADD:%.*]] = add i64 [[OR]], 0
+; BE-NEXT: [[TRUNC:%.*]] = trunc i64 [[ADD]] to i32
+; BE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
+; BE-NEXT: ret void
+;
+ %gep1 = getelementptr inbounds i8, ptr %p, i64 72
+ %ld1 = load i32, ptr %gep1, align 4
+ %ld1_zext = zext i32 %ld1 to i64
+ %ld1_shl = shl nuw i64 %ld1_zext, 32
+ %gep2 = getelementptr inbounds i8, ptr %p, i64 68
+ ; Don't move final_ptr before gep2
+ %final_ptr = bitcast ptr %gep2 to ptr
+ %ld2 = load i32, ptr %final_ptr, align 4
+ %ld2_zext = zext i32 %ld2 to i64
+ %or = or i64 %ld1_shl, %ld2_zext
+ %add = add i64 %or, 0
+ %trunc = trunc i64 %add to i32
+ store i32 %trunc, ptr %dest, align 4
+ ret void
+}
More information about the llvm-commits
mailing list