[llvm] 434b0ba - [AggressiveInstCombine] folding load for constant global patterened arrays and structs by alignment
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 23 07:31:57 PDT 2023
Author: khei4
Date: 2023-03-23T23:31:22+09:00
New Revision: 434b0badb5d53138490a075dd945df7480649154
URL: https://github.com/llvm/llvm-project/commit/434b0badb5d53138490a075dd945df7480649154
DIFF: https://github.com/llvm/llvm-project/commit/434b0badb5d53138490a075dd945df7480649154.diff
LOG: [AggressiveInstCombine] folding load for constant global patterened arrays and structs by alignment
Differential Revision: https://reviews.llvm.org/D144445
Reviewed By: nikic
fix: wrong arrow
Added:
Modified:
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 473b41241b8a6..cf652836bef25 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -305,7 +306,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
Value *MulOp0;
// Matching "(i * 0x01010101...) >> 24".
if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
- match(Op1, m_SpecificInt(MaskShift))) {
+ match(Op1, m_SpecificInt(MaskShift))) {
Value *ShiftOp0;
// Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
@@ -401,8 +402,8 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
/// pessimistic codegen that has to account for setting errno and can enable
/// vectorization.
-static bool
-foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) {
+static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI) {
// Match a call to sqrt mathlib function.
auto *Call = dyn_cast<CallInst>(&I);
if (!Call)
@@ -824,6 +825,58 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
return true;
}
+/// If C is a constant patterned array and all valid loaded results for given
+/// alignment are same to a constant, return that constant.
+static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ if (!LI || LI->isVolatile())
+ return false;
+
+ // We can only fold the load if it is from a constant global with definitive
+ // initializer. Skip expensive logic if this is not the case.
+ auto *PtrOp = LI->getPointerOperand();
+ auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return false;
+
+ Type *LoadTy = LI->getType();
+ Constant *C = GV->getInitializer();
+
+ // Bail for large initializers in excess of 4K to avoid too many scans.
+ uint64_t GVSize = DL.getTypeAllocSize(C->getType());
+ if (!GVSize || 4096 < GVSize)
+ return false;
+
+ // Check whether pointer arrives back at Global Variable.
+ // If PtrOp is neither GlobalVariable nor GEP, it might not arrive back at
+ // GlobalVariable.
+ // TODO: implement GEP handling
+ unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
+ // TODO: Determine stride based on GEPs.
+ APInt Stride(BW, 1);
+ APInt ConstOffset(BW, 0);
+
+ // Any possible offset could be multiple of GEP stride. And any valid
+ // offset is multiple of load alignment, so checking only multiples of bigger
+ // one is sufficient to say results' equality.
+ if (auto LA = LI->getAlign();
+ LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value())
+ Stride = APInt(BW, LA.value());
+
+ Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
+ if (!Ca)
+ return false;
+
+ unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
+ for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
+ if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
+ return false;
+
+ I.replaceAllUsesWith(Ca);
+
+ return true;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
@@ -850,6 +903,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
MadeChange |= tryToFPToSat(I, TTI);
MadeChange |= tryToRecognizeTableBasedCttz(I);
MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA);
+ MadeChange |= foldPatternedLoads(I, DL);
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
diff --git a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
index 5410a21e3211d..7acc6109744ca 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
@@ -12,12 +12,9 @@
@constpackedstruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4
@conststruct = internal constant {i16, [8 x i8]} {i16 1, [8 x i8] c"\01\00\01\00\01\00\01\00"}, align 4
-; TODO: this will be ret i8 1
define i8 @inbounds_gep_load_i8_align2(i64 %idx){
; CHECK-LABEL: @inbounds_gep_load_i8_align2(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray1, i64 [[IDX:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 1
;
%1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx
%2 = load i8, ptr %1, align 2
@@ -53,10 +50,7 @@ declare ptr @llvm.ptrmask.p0.i64(ptr , i64)
; can't be folded because ptrmask can change ptr, while preserving provenance
define i8 @inbounds_gep_load_i8_align2_ptrmasked(i64 %idx, i64 %mask){
; CHECK-LABEL: @inbounds_gep_load_i8_align2_ptrmasked(
-; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 [[MASK:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[IDX:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 2
-; CHECK-NEXT: ret i8 [[TMP3]]
+; CHECK-NEXT: ret i8 1
;
%1 = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 %mask)
%2 = getelementptr inbounds i8, ptr %1, i64 %idx
@@ -102,13 +96,12 @@ define i32 @inbounds_gep_i32_load_i32_const_offset(i64 %idx){
ret i32 %3
}
-; TODO: this coould be folded into 65537(LE), 16777472(BE)
define i32 @gep_load_i32_align2_const_offset(i64 %idx){
-; CHECK-LABEL: @gep_load_i32_align2_const_offset(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr @constarray1, i64 -2
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [3 x i16], ptr [[TMP1]], i64 [[IDX:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 2
-; CHECK-NEXT: ret i32 [[TMP3]]
+; LE-LABEL: @gep_load_i32_align2_const_offset(
+; LE-NEXT: ret i32 65537
+;
+; BE-LABEL: @gep_load_i32_align2_const_offset(
+; BE-NEXT: ret i32 16777472
;
%1 = getelementptr i16, ptr @constarray1, i64 -2
%2 = getelementptr [3 x i16], ptr %1, i64 %idx
@@ -146,12 +139,12 @@ define i32 @inbounds_gep_i32_load_i32_const_ptr_array(i64 %idx){
ret i32 %3
}
-; TODO: this coould be folded into 65537(LE), 16777472(BE)
define i32 @inbounds_gep_i32_load_i32_align4_packedstruct(i64 %idx){
-; CHECK-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constpackedstruct, i64 [[IDX:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT: ret i32 [[TMP2]]
+; LE-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct(
+; LE-NEXT: ret i32 65537
+;
+; BE-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct(
+; BE-NEXT: ret i32 16777472
;
%1 = getelementptr inbounds i32, ptr @constpackedstruct, i64 %idx
%2 = load i32, ptr %1, align 4
@@ -172,11 +165,14 @@ define i32 @inbounds_gep_i8_load_i32_align1_packedstruct(i64 %idx){
; TODO: this coould be folded into 65537(LE), 16777472(BE)
define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){
-; CHECK-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-; CHECK-NEXT: ret i32 [[TMP3]]
+; LE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(
+; LE-NEXT: ret i32 65537
+;
+; BE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(
+; BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1
+; BE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]]
+; BE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+; BE-NEXT: ret i32 [[TMP3]]
;
%1 = getelementptr inbounds i16, ptr @conststruct, i64 1
%2 = getelementptr inbounds i32, ptr %1, i64 %idx
@@ -184,6 +180,3 @@ define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){
ret i32 %3
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; BE: {{.*}}
-; LE: {{.*}}
More information about the llvm-commits
mailing list