[llvm] [InstCombine] Canonicalize `(gep <not i8> p, (div exact X, C))` (PR #96898)

Fri Jun 28 00:59:24 PDT 2024

================
@@ -2939,18 +2939,57 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           });
           return Changed ? &GEP : nullptr;
         }
-      } else {
+      } else if (auto *ExactIns =
+                     dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
         // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
         Value *V;
-        if ((has_single_bit(TyAllocSize) &&
-             match(GEP.getOperand(1),
-                   m_Exact(m_Shr(m_Value(V),
-                                 m_SpecificInt(countr_zero(TyAllocSize)))))) ||
-            match(GEP.getOperand(1),
-                  m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
-          return GetElementPtrInst::Create(Builder.getInt8Ty(),
-                                           GEP.getPointerOperand(), V,
-                                           GEP.getNoWrapFlags());
+        if (ExactIns->isExact()) {
+          if ((has_single_bit(TyAllocSize) &&
+               match(GEP.getOperand(1),
+                     m_Shr(m_Value(V),
+                           m_SpecificInt(countr_zero(TyAllocSize))))) ||
+              match(GEP.getOperand(1),
+                    m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
+            return GetElementPtrInst::Create(Builder.getInt8Ty(),
+                                             GEP.getPointerOperand(), V,
+                                             GEP.getNoWrapFlags());
+          }
+        }
+        if (ExactIns->isExact() && ExactIns->hasOneUse()) {
+          // Try to canonicalize non-i8 element type to i8 if the index is an
+          // exact instruction. If the index is an exact instruction (div/shr)
+          // with a constant RHS, we can fold the non-i8 element scale into the
+          // div/shr (similiar to the mul case, just inverted).
+          const APInt *C;
+          std::optional<APInt> NewC;
+          if (has_single_bit(TyAllocSize) &&
+              match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
+              C->uge(countr_zero(TyAllocSize)))
+            NewC = *C - countr_zero(TyAllocSize);
+          else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
+            APInt Quot;
+            uint64_t Rem;
+            APInt::udivrem(*C, TyAllocSize, Quot, Rem);
+            if (!Quot.isAllOnes() && Rem == 0)
----------------
dtcxzyw wrote:

I don't see this check in the proof.

```
----------------------------------------
define ptr @src_udiv(ptr %p, i8 %x, i8 %C) {
#0:
  %rem = urem i8 %C, 4
  %rem_zero = icmp eq i8 %rem, 0
  assume i1 %rem_zero
  %mul = udiv exact i8 %x, %C
  %gep = gep ptr %p, 4 x i8 %mul
  ret ptr %gep
}
=>
define ptr @tgt_udiv(ptr %p, i8 %x, i8 %C) {
#0:
  %rem = urem i8 %C, 4
  %rem_zero = icmp eq i8 %rem, 0
  assume i1 %rem_zero
  %cdiv = udiv i8 %C, 4
  %mul = udiv exact i8 %x, %cdiv
  %gep = gep ptr %p, 1 x i8 %mul
  ret ptr %gep
}
Transformation seems to be correct!
```


https://github.com/llvm/llvm-project/pull/96898