[llvm] [X86] matchAddressRecursively - move ZERO_EXTEND patterns into matchIndexRecursively (PR #85081)

Thu Mar 14 17:54:26 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: RicoAfoat (RicoAfoat)

<details>
<summary>Changes</summary>

Fixes https://github.com/llvm/llvm-project/issues/82431 - see https://github.com/llvm/llvm-project/issues/82431 for more information.

1. Move all ZERO_EXTEND patterns into `matchIndexRecursively`. 
2. Match `ZERO_EXTEND` patterns recursively so that it can't be reduced further latter, which fixes the bug.
3. Change the return type of `matchIndexRecursively` because there isn't a direct indicator to show whether pattern is matched or not. Alternatives like `N!=matchIndexRecursively(N,AM,Depth+1)` will not help because during match process `SDNodes` maybe be deallocate and allocate on the same memory address. Thus the return value and N maybe be "equal" on the surface.

(That is to say, the return value's node is build on the same address where N's node just deallocate)

Additional problems to be fixed:

1. `adapter` lambda expression is not that elegant
4. Maybe avoid directly modifying `AM.IndexReg` in `matchingIndexRecursively`

---
Full diff: https://github.com/llvm/llvm-project/pull/85081.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+152-145) 
- (modified) llvm/test/CodeGen/X86/inline-asm-memop.ll (+53-9) 


``````````diff

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 76c6c1645239ab..06a322fd243212 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -212,8 +212,8 @@ namespace {
     bool matchAddress(SDValue N, X86ISelAddressMode &AM);
     bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
     bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
-    SDValue matchIndexRecursively(SDValue N, X86ISelAddressMode &AM,
-                                  unsigned Depth);
+    std::optional<SDValue>
+    matchIndexRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
     bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                  unsigned Depth);
     bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
@@ -2278,18 +2278,24 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
   return false;
 }
 
-// Attempt to peek further into a scaled index register, collecting additional
-// extensions / offsets / etc. Returns /p N if we can't peek any further.
-SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
-                                               X86ISelAddressMode &AM,
-                                               unsigned Depth) {
+// Change the return type of matchIndexRecursively to std::optional<SDValue> to
+// make it easier for caller to tell whether the match is successful. Attempt to
+// peek further into a scaled index register, collecting additional extensions /
+// offsets / etc. Returns /p N if we can't peek any further.
+std::optional<SDValue>
+X86DAGToDAGISel::matchIndexRecursively(SDValue N, X86ISelAddressMode &AM,
+                                       unsigned Depth) {
   assert(AM.IndexReg.getNode() == nullptr && "IndexReg already matched");
   assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) &&
          "Illegal index scale");
 
+  auto adapter = [this, &AM, &Depth](SDValue N) -> SDValue {
+    return matchIndexRecursively(N, AM, Depth + 1).value_or(N);
+  };
+
   // Limit recursion.
   if (Depth >= SelectionDAG::MaxRecursionDepth)
-    return N;
+    return std::nullopt;
 
   EVT VT = N.getValueType();
   unsigned Opc = N.getOpcode();
@@ -2299,14 +2305,14 @@ SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
     auto *AddVal = cast<ConstantSDNode>(N.getOperand(1));
     uint64_t Offset = (uint64_t)AddVal->getSExtValue() * AM.Scale;
     if (!foldOffsetIntoAddress(Offset, AM))
-      return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
+      return adapter(N.getOperand(0));
   }
 
   // index: add(x,x) -> index: x, scale * 2
   if (Opc == ISD::ADD && N.getOperand(0) == N.getOperand(1)) {
     if (AM.Scale <= 4) {
       AM.Scale *= 2;
-      return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
+      return adapter(N.getOperand(0));
     }
   }
 
@@ -2316,7 +2322,7 @@ SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
     uint64_t ScaleAmt = 1ULL << ShiftAmt;
     if ((AM.Scale * ScaleAmt) <= 8) {
       AM.Scale *= ScaleAmt;
-      return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
+      return adapter(N.getOperand(0));
     }
   }
 
@@ -2346,54 +2352,137 @@ SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
     }
   }
 
-  // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c)
-  // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c)
-  // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
-  if (Opc == ISD::ZERO_EXTEND && !VT.isVector() && N.hasOneUse()) {
+  if (Opc == ISD::ZERO_EXTEND) {
+    // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c)
+    // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c)
     SDValue Src = N.getOperand(0);
-    unsigned SrcOpc = Src.getOpcode();
-    if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
-         CurDAG->isADDLike(Src)) &&
-        Src.hasOneUse()) {
-      if (CurDAG->isBaseWithConstantOffset(Src)) {
-        SDValue AddSrc = Src.getOperand(0);
-        uint64_t Offset = Src.getConstantOperandVal(1);
-        if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
-          SDLoc DL(N);
-          SDValue Res;
-          // If we're also scaling, see if we can use that as well.
-          if (AddSrc.getOpcode() == ISD::SHL &&
-              isa<ConstantSDNode>(AddSrc.getOperand(1))) {
-            SDValue ShVal = AddSrc.getOperand(0);
-            uint64_t ShAmt = AddSrc.getConstantOperandVal(1);
-            APInt HiBits =
-                APInt::getHighBitsSet(AddSrc.getScalarValueSizeInBits(), ShAmt);
-            uint64_t ScaleAmt = 1ULL << ShAmt;
-            if ((AM.Scale * ScaleAmt) <= 8 &&
-                (AddSrc->getFlags().hasNoUnsignedWrap() ||
-                 CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
-              AM.Scale *= ScaleAmt;
-              SDValue ExtShVal = CurDAG->getNode(Opc, DL, VT, ShVal);
-              SDValue ExtShift = CurDAG->getNode(ISD::SHL, DL, VT, ExtShVal,
-                                                 AddSrc.getOperand(1));
-              insertDAGNode(*CurDAG, N, ExtShVal);
-              insertDAGNode(*CurDAG, N, ExtShift);
-              AddSrc = ExtShift;
-              Res = ExtShVal;
+    if (!VT.isVector() && N.hasOneUse()) {
+      unsigned SrcOpc = Src.getOpcode();
+      if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
+           CurDAG->isADDLike(Src)) &&
+          Src.hasOneUse()) {
+        if (CurDAG->isBaseWithConstantOffset(Src)) {
+          SDValue AddSrc = Src.getOperand(0);
+          uint64_t Offset = Src.getConstantOperandVal(1);
+          if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
+            SDLoc DL(N);
+            SDValue Res;
+            // If we're also scaling, see if we can use that as well.
+            if (AddSrc.getOpcode() == ISD::SHL &&
+                isa<ConstantSDNode>(AddSrc.getOperand(1))) {
+              SDValue ShVal = AddSrc.getOperand(0);
+              uint64_t ShAmt = AddSrc.getConstantOperandVal(1);
+              APInt HiBits = APInt::getHighBitsSet(
+                  AddSrc.getScalarValueSizeInBits(), ShAmt);
+              uint64_t ScaleAmt = 1ULL << ShAmt;
+              if ((AM.Scale * ScaleAmt) <= 8 &&
+                  (AddSrc->getFlags().hasNoUnsignedWrap() ||
+                   CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
+                AM.Scale *= ScaleAmt;
+                SDValue ExtShVal = CurDAG->getNode(Opc, DL, VT, ShVal);
+                SDValue ExtShift = CurDAG->getNode(ISD::SHL, DL, VT, ExtShVal,
+                                                   AddSrc.getOperand(1));
+                insertDAGNode(*CurDAG, N, ExtShVal);
+                insertDAGNode(*CurDAG, N, ExtShift);
+                AddSrc = ExtShift;
+                Res = adapter(ExtShVal);
+              }
             }
+            SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
+            SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
+            SDValue ExtAdd = CurDAG->getNode(SrcOpc, DL, VT, ExtSrc, ExtVal);
+            insertDAGNode(*CurDAG, N, ExtSrc);
+            insertDAGNode(*CurDAG, N, ExtVal);
+            insertDAGNode(*CurDAG, N, ExtAdd);
+            CurDAG->ReplaceAllUsesWith(N, ExtAdd);
+            CurDAG->RemoveDeadNode(N.getNode());
+            // AM.IndexReg can be further picked
+            SDValue Zext = adapter(ExtSrc);
+            return Res ? Res : Zext;
           }
-          SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
-          SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
-          SDValue ExtAdd = CurDAG->getNode(SrcOpc, DL, VT, ExtSrc, ExtVal);
-          insertDAGNode(*CurDAG, N, ExtSrc);
-          insertDAGNode(*CurDAG, N, ExtVal);
-          insertDAGNode(*CurDAG, N, ExtAdd);
-          CurDAG->ReplaceAllUsesWith(N, ExtAdd);
-          CurDAG->RemoveDeadNode(N.getNode());
-          return Res ? Res : ExtSrc;
         }
       }
     }
+
+    // Peek through mask: zext(and(shl(x,c1),c2))
+    APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits());
+    if (Src.getOpcode() == ISD::AND && Src.hasOneUse())
+      if (auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
+        Mask = MaskC->getAPIntValue();
+        Src = Src.getOperand(0);
+      }
+
+    if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) {
+      // Give up if the shift is not a valid scale factor [1,2,3].
+      SDValue ShlSrc = Src.getOperand(0);
+      SDValue ShlAmt = Src.getOperand(1);
+      auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
+      if (!ShAmtC)
+        return std::nullopt;
+      unsigned ShAmtV = ShAmtC->getZExtValue();
+      if (ShAmtV > 3 || (1 << ShAmtV) * AM.Scale > 8)
+        return std::nullopt;
+
+      // The narrow shift must only shift out zero bits (it must be 'nuw').
+      // That makes it safe to widen to the destination type.
+      APInt HighZeros =
+          APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV);
+      if (!Src->getFlags().hasNoUnsignedWrap() &&
+          !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
+        return std::nullopt;
+
+      // zext (shl nuw i8 %x, C1) to i32
+      // --> shl (zext i8 %x to i32), (zext C1)
+      // zext (and (shl nuw i8 %x, C1), C2) to i32
+      // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1)
+      MVT SrcVT = ShlSrc.getSimpleValueType();
+      MVT VT = N.getSimpleValueType();
+      SDLoc DL(N);
+
+      SDValue Res = ShlSrc;
+      if (!Mask.isAllOnes()) {
+        Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT);
+        insertDAGNode(*CurDAG, N, Res);
+        Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res);
+        insertDAGNode(*CurDAG, N, Res);
+      }
+      SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+      insertDAGNode(*CurDAG, N, Zext);
+      SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt);
+      insertDAGNode(*CurDAG, N, NewShl);
+      CurDAG->ReplaceAllUsesWith(N, NewShl);
+      CurDAG->RemoveDeadNode(N.getNode());
+
+      // Convert the shift to scale factor.
+      AM.Scale *= 1 << ShAmtV;
+      // If matchIndexRecursively is not called here,
+      // Zext may be replaced by other nodes but later used to call a builder
+      // method
+      return adapter(Zext);
+    }
+
+    // TODO: Do not modify AM.IndexReg inside directly.
+    if (AM.Scale != 1)
+      return std::nullopt;
+
+    if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes()) {
+      // Try to fold the mask and shift into an extract and scale.
+      if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
+                                     Src.getOperand(0), AM))
+        return AM.IndexReg;
+
+      // Try to fold the mask and shift directly into the scale.
+      if (!foldMaskAndShiftToScale(*CurDAG, N, Mask.getZExtValue(), Src,
+                                   Src.getOperand(0), AM))
+        return AM.IndexReg;
+
+      // Try to fold the mask and shift into BEXTR and scale.
+      if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask.getZExtValue(), Src,
+                                  Src.getOperand(0), AM, *Subtarget))
+        return AM.IndexReg;
+    }
+
+    return std::nullopt;
   }
 
   // TODO: Handle extensions, shifted masks etc.
@@ -2479,7 +2568,8 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       if (Val == 1 || Val == 2 || Val == 3) {
         SDValue ShVal = N.getOperand(0);
         AM.Scale = 1 << Val;
-        AM.IndexReg = matchIndexRecursively(ShVal, AM, Depth + 1);
+        AM.IndexReg =
+            matchIndexRecursively(ShVal, AM, Depth + 1).value_or(ShVal);
         return false;
       }
     }
@@ -2670,97 +2760,13 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     break;
   }
   case ISD::ZERO_EXTEND: {
-    // Try to widen a zexted shift left to the same size as its use, so we can
-    // match the shift as a scale factor.
-    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
+    if (AM.IndexReg.getNode() != nullptr)
       break;
-
-    SDValue Src = N.getOperand(0);
-
-    // See if we can match a zext(addlike(x,c)).
-    // TODO: Move more ZERO_EXTEND patterns into matchIndexRecursively.
-    if (Src.getOpcode() == ISD::ADD || Src.getOpcode() == ISD::OR)
-      if (SDValue Index = matchIndexRecursively(N, AM, Depth + 1))
-        if (Index != N) {
-          AM.IndexReg = Index;
-          return false;
-        }
-
-    // Peek through mask: zext(and(shl(x,c1),c2))
-    APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits());
-    if (Src.getOpcode() == ISD::AND && Src.hasOneUse())
-      if (auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
-        Mask = MaskC->getAPIntValue();
-        Src = Src.getOperand(0);
-      }
-
-    if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) {
-      // Give up if the shift is not a valid scale factor [1,2,3].
-      SDValue ShlSrc = Src.getOperand(0);
-      SDValue ShlAmt = Src.getOperand(1);
-      auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
-      if (!ShAmtC)
-        break;
-      unsigned ShAmtV = ShAmtC->getZExtValue();
-      if (ShAmtV > 3)
-        break;
-
-      // The narrow shift must only shift out zero bits (it must be 'nuw').
-      // That makes it safe to widen to the destination type.
-      APInt HighZeros =
-          APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV);
-      if (!Src->getFlags().hasNoUnsignedWrap() &&
-          !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
-        break;
-
-      // zext (shl nuw i8 %x, C1) to i32
-      // --> shl (zext i8 %x to i32), (zext C1)
-      // zext (and (shl nuw i8 %x, C1), C2) to i32
-      // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1)
-      MVT SrcVT = ShlSrc.getSimpleValueType();
-      MVT VT = N.getSimpleValueType();
-      SDLoc DL(N);
-
-      SDValue Res = ShlSrc;
-      if (!Mask.isAllOnes()) {
-        Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT);
-        insertDAGNode(*CurDAG, N, Res);
-        Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res);
-        insertDAGNode(*CurDAG, N, Res);
-      }
-      SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res);
-      insertDAGNode(*CurDAG, N, Zext);
-      SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt);
-      insertDAGNode(*CurDAG, N, NewShl);
-      CurDAG->ReplaceAllUsesWith(N, NewShl);
-      CurDAG->RemoveDeadNode(N.getNode());
-
-      // Convert the shift to scale factor.
-      AM.Scale = 1 << ShAmtV;
-      // If matchIndexRecursively is not called here,
-      // Zext may be replaced by other nodes but later used to call a builder
-      // method
-      AM.IndexReg = matchIndexRecursively(Zext, AM, Depth + 1);
+    // All relevant operations are moved into matchIndexRecursively.
+    if (auto Index = matchIndexRecursively(N, AM, Depth + 1)) {
+      AM.IndexReg = Index.value();
       return false;
     }
-
-    if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes()) {
-      // Try to fold the mask and shift into an extract and scale.
-      if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
-                                     Src.getOperand(0), AM))
-        return false;
-
-      // Try to fold the mask and shift directly into the scale.
-      if (!foldMaskAndShiftToScale(*CurDAG, N, Mask.getZExtValue(), Src,
-                                   Src.getOperand(0), AM))
-        return false;
-
-      // Try to fold the mask and shift into BEXTR and scale.
-      if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask.getZExtValue(), Src,
-                                  Src.getOperand(0), AM, *Subtarget))
-        return false;
-    }
-
     break;
   }
   }
@@ -2859,9 +2865,10 @@ bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
 
   // Attempt to match index patterns, as long as we're not relying on implicit
   // sign-extension, which is performed BEFORE scale.
-  if (IndexOp.getScalarValueSizeInBits() == BasePtr.getScalarValueSizeInBits())
-    AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0);
-  else
+  if (IndexOp.getScalarValueSizeInBits() ==
+      BasePtr.getScalarValueSizeInBits()) {
+    AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0).value_or(IndexOp);
+  } else
     AM.IndexReg = IndexOp;
 
   unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
diff --git a/llvm/test/CodeGen/X86/inline-asm-memop.ll b/llvm/test/CodeGen/X86/inline-asm-memop.ll
index 83442498076102..38b81b11a8dfc2 100644
--- a/llvm/test/CodeGen/X86/inline-asm-memop.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-memop.ll
@@ -1,20 +1,46 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
 ; A bug in X86DAGToDAGISel::matchAddressRecursively create a zext SDValue which
 ; is quickly replaced by other SDValue but already pushed into vector for later
 ; calling for SelectionDAGISel::Select_INLINEASM getNode builder, see issue
 ; 82431 for more infomation.
 
-define void @PR82431(i8 %call, ptr %b) {
-; CHECK-LABEL: PR82431:
+define i64 @PR82431_0(i8 %call, ptr %b) {
+; CHECK-LABEL: PR82431_0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movb %dil, %al
-; CHECK-NEXT:    addb $1, %al
-; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    # kill: def $rax killed $eax
-; CHECK-NEXT:    shlq $3, %rax
-; CHECK-NEXT:    addq %rax, %rsi
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    movq 8(%rsi,%rax,8), %rax
+; CHECK-NEXT:    retq
+entry:
+  %narrow = add nuw i8 %call, 1
+  %idxprom = zext i8 %narrow to i64
+  %arrayidx = getelementptr [1 x i64], ptr %b, i64 0, i64 %idxprom
+  %ret_val = load i64, ptr %arrayidx
+  ret i64 %ret_val
+}
+
+define i32 @PR82431_1(i32 %0, ptr %f) {
+; CHECK-LABEL: PR82431_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    andl $4, %edi
+; CHECK-NEXT:    movl 4(%rsi,%rdi,2), %eax
+; CHECK-NEXT:    retq
+entry:
+  %shr = lshr i32 %0, 1
+  %and = and i32 %shr, 2
+  %add = or i32 %and, 1
+  %idxprom = zext i32 %add to i64
+  %arrayidx = getelementptr [0 x i32], ptr %f, i64 0, i64 %idxprom
+  %ret_val = load i32, ptr %arrayidx
+  ret i32 %ret_val
+}
+
+define void @PR82431_2(i8 %call, ptr %b) {
+; CHECK-LABEL: PR82431_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movzbl %dil, %eax
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    retq
@@ -25,3 +51,21 @@ entry:
   tail call void asm "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %arrayidx, ptr elementtype(i64) %arrayidx)
   ret void
 }
+
+define void @PR82431_3(i32 %0, ptr %f) {
+; CHECK-LABEL: PR82431_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    andl $4, %edi
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    retq
+entry:
+  %shr = lshr i32 %0, 1
+  %and = and i32 %shr, 2
+  %add = or i32 %and, 1
+  %idxprom = zext i32 %add to i64
+  %arrayidx = getelementptr [0 x i32], ptr %f, i64 0, i64 %idxprom
+  tail call void asm "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i32) %arrayidx, ptr elementtype(i32) %arrayidx)
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/85081