[llvm] [InstCombine] Canonicalize `(gep <not i8> p, (div exact X, C))` (PR #96898)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 00:59:24 PDT 2024
================
@@ -2939,18 +2939,57 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
});
return Changed ? &GEP : nullptr;
}
- } else {
+ } else if (auto *ExactIns =
+ dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
// Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
Value *V;
- if ((has_single_bit(TyAllocSize) &&
- match(GEP.getOperand(1),
- m_Exact(m_Shr(m_Value(V),
- m_SpecificInt(countr_zero(TyAllocSize)))))) ||
- match(GEP.getOperand(1),
- m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
- return GetElementPtrInst::Create(Builder.getInt8Ty(),
- GEP.getPointerOperand(), V,
- GEP.getNoWrapFlags());
+ if (ExactIns->isExact()) {
+ if ((has_single_bit(TyAllocSize) &&
+ match(GEP.getOperand(1),
+ m_Shr(m_Value(V),
+ m_SpecificInt(countr_zero(TyAllocSize))))) ||
+ match(GEP.getOperand(1),
+ m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
+ return GetElementPtrInst::Create(Builder.getInt8Ty(),
+ GEP.getPointerOperand(), V,
+ GEP.getNoWrapFlags());
+ }
+ }
+ if (ExactIns->isExact() && ExactIns->hasOneUse()) {
+ // Try to canonicalize non-i8 element type to i8 if the index is an
+ // exact instruction. If the index is an exact instruction (div/shr)
+ // with a constant RHS, we can fold the non-i8 element scale into the
+ // div/shr (similiar to the mul case, just inverted).
+ const APInt *C;
+ std::optional<APInt> NewC;
+ if (has_single_bit(TyAllocSize) &&
+ match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
+ C->uge(countr_zero(TyAllocSize)))
+ NewC = *C - countr_zero(TyAllocSize);
+ else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
+ APInt Quot;
+ uint64_t Rem;
+ APInt::udivrem(*C, TyAllocSize, Quot, Rem);
+ if (!Quot.isAllOnes() && Rem == 0)
----------------
dtcxzyw wrote:
I don't see this check in the proof.
```
----------------------------------------
define ptr @src_udiv(ptr %p, i8 %x, i8 %C) {
#0:
%rem = urem i8 %C, 4
%rem_zero = icmp eq i8 %rem, 0
assume i1 %rem_zero
%mul = udiv exact i8 %x, %C
%gep = gep ptr %p, 4 x i8 %mul
ret ptr %gep
}
=>
define ptr @tgt_udiv(ptr %p, i8 %x, i8 %C) {
#0:
%rem = urem i8 %C, 4
%rem_zero = icmp eq i8 %rem, 0
assume i1 %rem_zero
%cdiv = udiv i8 %C, 4
%mul = udiv exact i8 %x, %cdiv
%gep = gep ptr %p, 1 x i8 %mul
ret ptr %gep
}
Transformation seems to be correct!
```
https://github.com/llvm/llvm-project/pull/96898
More information about the llvm-commits
mailing list