[llvm] [InstCombine] Fold vector.reduce.op(vector.reverse(X)) -> vector.reduce.op(X) (PR #91743)

Fri May 10 06:50:30 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: David Sherwood (david-arm)

<details>
<summary>Changes</summary>

For all of the following reductions:

vector.reduce.or
vector.reduce.and
vector.reduce.xor
vector.reduce.add
vector.reduce.mul
vector.reduce.umin
vector.reduce.umax
vector.reduce.smin
vector.reduce.smax
vector.reduce.fmin
vector.reduce.fmax

if the input operand is the result of a vector.reverse then we can perform a reduction on the vector.reverse input instead since the answer is the same. If the reassociation is permitted we can also do the same folds for these:

vector.reduce.fadd
vector.reduce.fmul

---
Full diff: https://github.com/llvm/llvm-project/pull/91743.diff


3 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+66-1) 
- (modified) llvm/test/Transforms/InstCombine/vector-logical-reductions.ll (+72) 
- (modified) llvm/test/Transforms/InstCombine/vector-reductions.ll (+162) 


``````````diff

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d7433ad3599f9..fb2d62e23493a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3222,6 +3222,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     // %res = cmp eq iReduxWidth %val, 11111
     Value *Arg = II->getArgOperand(0);
     Value *Vect;
+    // When doing a logical reduction of a reversed operand the result is
+    // identical to reducing the unreversed operand.
+    if (match(Arg, m_VecReverse(m_Value(Vect)))) {
+      Value *Res = IID == Intrinsic::vector_reduce_or
+                       ? Builder.CreateOrReduce(Vect)
+                       : Builder.CreateAndReduce(Vect);
+      return replaceInstUsesWith(CI, Res);
+    }
     if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
       if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
         if (FTy->getElementType() == Builder.getInt1Ty()) {
@@ -3253,6 +3261,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       // Trunc(ctpop(bitcast <n x i1> to in)).
       Value *Arg = II->getArgOperand(0);
       Value *Vect;
+      // When doing an integer add reduction of a reversed operand the result
+      // is identical to reducing the unreversed operand.
+      if (match(Arg, m_VecReverse(m_Value(Vect)))) {
+        Value *Res = Builder.CreateAddReduce(Vect);
+        return replaceInstUsesWith(CI, Res);
+      }
       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
           if (FTy->getElementType() == Builder.getInt1Ty()) {
@@ -3281,6 +3295,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       //   ?ext(vector_reduce_add(<n x i1>))
       Value *Arg = II->getArgOperand(0);
       Value *Vect;
+      // When doing a xor reduction of a reversed operand the result is
+      // identical to reducing the unreversed operand.
+      if (match(Arg, m_VecReverse(m_Value(Vect)))) {
+        Value *Res = Builder.CreateXorReduce(Vect);
+        return replaceInstUsesWith(CI, Res);
+      }
       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
           if (FTy->getElementType() == Builder.getInt1Ty()) {
@@ -3304,6 +3324,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       //   zext(vector_reduce_and(<n x i1>))
       Value *Arg = II->getArgOperand(0);
       Value *Vect;
+      // When doing a mul reduction of a reversed operand the result is
+      // identical to reducing the unreversed operand.
+      if (match(Arg, m_VecReverse(m_Value(Vect)))) {
+        Value *Res = Builder.CreateMulReduce(Vect);
+        return replaceInstUsesWith(CI, Res);
+      }
       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
           if (FTy->getElementType() == Builder.getInt1Ty()) {
@@ -3328,6 +3354,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       //   ?ext(vector_reduce_{and,or}(<n x i1>))
       Value *Arg = II->getArgOperand(0);
       Value *Vect;
+      // When doing a min/max reduction of a reversed operand the result is
+      // identical to reducing the unreversed operand.
+      if (match(Arg, m_VecReverse(m_Value(Vect)))) {
+        Value *Res = IID == Intrinsic::vector_reduce_umin
+                         ? Builder.CreateIntMinReduce(Vect, false)
+                         : Builder.CreateIntMaxReduce(Vect, false);
+        return replaceInstUsesWith(CI, Res);
+      }
       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
           if (FTy->getElementType() == Builder.getInt1Ty()) {
@@ -3363,6 +3397,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       //   zext(vector_reduce_{and,or}(<n x i1>))
       Value *Arg = II->getArgOperand(0);
       Value *Vect;
+      // When doing a min/max reduction of a reversed operand the result is
+      // identical to reducing the unreversed operand.
+      if (match(Arg, m_VecReverse(m_Value(Vect)))) {
+        Value *Res = IID == Intrinsic::vector_reduce_smin
+                         ? Builder.CreateIntMinReduce(Vect, true)
+                         : Builder.CreateIntMaxReduce(Vect, true);
+        return replaceInstUsesWith(CI, Res);
+      }
       if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
         if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
           if (FTy->getElementType() == Builder.getInt1Ty()) {
@@ -3394,8 +3436,31 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
                                 : 0;
     Value *Arg = II->getArgOperand(ArgIdx);
     Value *V;
+
+    if (!CanBeReassociated)
+      break;
+
+    if (match(Arg, m_VecReverse(m_Value(V)))) {
+      Value *Res;
+      switch (IID) {
+      case Intrinsic::vector_reduce_fadd:
+        Res = Builder.CreateFAddReduce(II->getArgOperand(0), V);
+        break;
+      case Intrinsic::vector_reduce_fmul:
+        Res = Builder.CreateFMulReduce(II->getArgOperand(0), V);
+        break;
+      case Intrinsic::vector_reduce_fmin:
+        Res = Builder.CreateFPMinReduce(V);
+        break;
+      case Intrinsic::vector_reduce_fmax:
+        Res = Builder.CreateFPMaxReduce(V);
+        break;
+      }
+      return replaceInstUsesWith(CI, Res);
+    }
+
     ArrayRef<int> Mask;
-    if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
+    if (!isa<FixedVectorType>(Arg->getType()) ||
         !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
         !cast<ShuffleVectorInst>(Arg)->isSingleSource())
       break;
diff --git a/llvm/test/Transforms/InstCombine/vector-logical-reductions.ll b/llvm/test/Transforms/InstCombine/vector-logical-reductions.ll
index 9bb307ebf71e8..da4a0ca754680 100644
--- a/llvm/test/Transforms/InstCombine/vector-logical-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-logical-reductions.ll
@@ -21,5 +21,77 @@ define i1 @reduction_logical_and(<4 x i1> %x) {
   ret i1 %r
 }
 
+define i1 @reduction_logical_or_reverse_nxv2i1(<vscale x 2 x i1> %p) {
+; CHECK-LABEL: @reduction_logical_or_reverse_nxv2i1(
+; CHECK-NEXT:    [[RED:%.*]] = call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> [[P:%.*]])
+; CHECK-NEXT:    ret i1 [[RED]]
+;
+  %rev = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %p)
+  %red = call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %rev)
+  ret i1 %red
+}
+
+define i1 @reduction_logical_or_reverse_v2i1(<2 x i1> %p) {
+; CHECK-LABEL: @reduction_logical_or_reverse_v2i1(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i1> [[P:%.*]] to i2
+; CHECK-NEXT:    [[RED:%.*]] = icmp ne i2 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[RED]]
+;
+  %rev = call <2 x i1> @llvm.vector.reverse.v2i1(<2 x i1> %p)
+  %red = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %rev)
+  ret i1 %red
+}
+
+define i1 @reduction_logical_and_reverse_nxv2i1(<vscale x 2 x i1> %p) {
+; CHECK-LABEL: @reduction_logical_and_reverse_nxv2i1(
+; CHECK-NEXT:    [[RED:%.*]] = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> [[P:%.*]])
+; CHECK-NEXT:    ret i1 [[RED]]
+;
+  %rev = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %p)
+  %red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %rev)
+  ret i1 %red
+}
+
+define i1 @reduction_logical_and_reverse_v2i1(<2 x i1> %p) {
+; CHECK-LABEL: @reduction_logical_and_reverse_v2i1(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i1> [[P:%.*]] to i2
+; CHECK-NEXT:    [[RED:%.*]] = icmp eq i2 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[RED]]
+;
+  %rev = call <2 x i1> @llvm.vector.reverse.v2i1(<2 x i1> %p)
+  %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %rev)
+  ret i1 %red
+}
+
+define i1 @reduction_logical_xor_reverse_nxv2i1(<vscale x 2 x i1> %p) {
+; CHECK-LABEL: @reduction_logical_xor_reverse_nxv2i1(
+; CHECK-NEXT:    [[RED:%.*]] = call i1 @llvm.vector.reduce.xor.nxv2i1(<vscale x 2 x i1> [[P:%.*]])
+; CHECK-NEXT:    ret i1 [[RED]]
+;
+  %rev = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %p)
+  %red = call i1 @llvm.vector.reduce.xor.nxv2i1(<vscale x 2 x i1> %rev)
+  ret i1 %red
+}
+
+define i1 @reduction_logical_xor_reverse_v2i1(<2 x i1> %p) {
+; CHECK-LABEL: @reduction_logical_xor_reverse_v2i1(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i1> [[P:%.*]] to i2
+; CHECK-NEXT:    [[TMP2:%.*]] = call range(i2 0, -1) i2 @llvm.ctpop.i2(i2 [[TMP1]])
+; CHECK-NEXT:    [[RED:%.*]] = trunc i2 [[TMP2]] to i1
+; CHECK-NEXT:    ret i1 [[RED]]
+;
+  %rev = call <2 x i1> @llvm.vector.reverse.v2i1(<2 x i1> %p)
+  %red = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %rev)
+  ret i1 %red
+}
+
 declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
+declare i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1>)
+declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
+declare i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1>)
+declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
+declare i1 @llvm.vector.reduce.xor.nxv2i1(<vscale x 2 x i1>)
+declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>)
+declare <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1>)
+declare <2 x i1> @llvm.vector.reverse.v2i1(<2 x i1>)
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index 2614ffd386952..3e2a23a5ef64e 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -3,12 +3,29 @@
 
 declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
 declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare float @llvm.vector.reduce.fmul.f32.nxv4f32(float, <vscale x 4 x float>)
+declare float @llvm.vector.reduce.fmin.f32.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fmax.f32.nxv4f32(float, <vscale x 4 x float>)
 declare void @use_f32(float)
 
 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
 declare void @use_i32(i32)
 
+declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32>)
+
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
+
+declare <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float>)
+declare <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32>)
+declare <4 x float> @llvm.vector.reverse.v4f32(<4 x float>)
+
 define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
 ; CHECK-LABEL: @diff_of_sums_v4f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]]
@@ -22,6 +39,71 @@ define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x flo
   ret float %r
 }
 
+define float @reassoc_sum_of_reverse_v4f32(<4 x float> %v0) {
+; CHECK-LABEL: @reassoc_sum_of_reverse_v4f32(
+; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[V0:%.*]])
+; CHECK-NEXT:    ret float [[RED]]
+;
+  %rev = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> %v0)
+  %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float zeroinitializer, <4 x float> %rev)
+  ret float %red
+}
+
+define float @reassoc_mul_reduction_of_reverse_nxv4f32(<vscale x 4 x float> %v0) {
+; CHECK-LABEL: @reassoc_mul_reduction_of_reverse_nxv4f32(
+; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fmul.nxv4f32(float 1.000000e+00, <vscale x 4 x float> [[V0:%.*]])
+; CHECK-NEXT:    ret float [[RED]]
+;
+  %rev = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %v0)
+  %red = call reassoc float @llvm.vector.reduce.fmul.nxv4f32(float 1.0, <vscale x 4 x float> %rev)
+  ret float %red
+}
+
+define float @fmax_of_reverse_v4f32(<4 x float> %v0) {
+; CHECK-LABEL: @fmax_of_reverse_v4f32(
+; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[V0:%.*]])
+; CHECK-NEXT:    ret float [[RED]]
+;
+  %rev = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> %v0)
+  %red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %rev)
+  ret float %red
+}
+
+define float @fmin_of_reverse_nxv4f32(<vscale x 4 x float> %v0) {
+; CHECK-LABEL: @fmin_of_reverse_nxv4f32(
+; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[V0:%.*]])
+; CHECK-NEXT:    ret float [[RED]]
+;
+  %rev = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %v0)
+  %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %rev)
+  ret float %red
+}
+
+; negative test - fadd cannot be folded with reverse due to lack of reassoc
+define float @sum_of_reverse_v4f32(<4 x float> %v0) {
+; CHECK-LABEL: @sum_of_reverse_v4f32(
+; CHECK-NEXT:    [[REV:%.*]] = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[V0:%.*]])
+; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[REV]])
+; CHECK-NEXT:    ret float [[RED]]
+;
+  %rev = call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> %v0)
+  %red = call float @llvm.vector.reduce.fadd.v4f32(float zeroinitializer, <4 x float> %rev)
+  ret float %red
+}
+
+; negative test - fmul cannot be folded with reverse due to lack of reassoc
+define float @mul_reduction_of_reverse_nxv4f32(<vscale x 4 x float> %v0) {
+; CHECK-LABEL: @mul_reduction_of_reverse_nxv4f32(
+; CHECK-NEXT:    [[REV:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[V0:%.*]])
+; CHECK-NEXT:    [[RED:%.*]] = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[REV]])
+; CHECK-NEXT:    ret float [[RED]]
+;
+  %rev = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %v0)
+  %red = call float @llvm.vector.reduce.fmul.nxv4f32(float zeroinitializer, <vscale x 4 x float> %rev)
+  ret float %red
+}
+
+
 ; negative test - fsub must allow reassociation
 
 define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
@@ -98,6 +180,86 @@ define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
   ret i32 %r
 }
 
+define i32 @sum_of_reverse_v4i32(<4 x i32> %v0) {
+; CHECK-LABEL: @sum_of_reverse_v4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %rev)
+  ret i32 %red
+}
+
+define i32 @sum_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
+; CHECK-LABEL: @sum_of_reverse_nxv4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %rev)
+  ret i32 %red
+}
+
+define i32 @mul_reduce_of_reverse_v4i32(<4 x i32> %v0) {
+; CHECK-LABEL: @mul_reduce_of_reverse_v4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %rev)
+  ret i32 %red
+}
+
+define i32 @mul_reduce_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
+; CHECK-LABEL: @mul_reduce_of_reverse_nxv4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %rev)
+  ret i32 %red
+}
+
+define i32 @smin_reduce_of_reverse_v4i32(<4 x i32> %v0) {
+; CHECK-LABEL: @smin_reduce_of_reverse_v4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %rev)
+  ret i32 %red
+}
+
+define i32 @smax_reduce_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
+; CHECK-LABEL: @smax_reduce_of_reverse_nxv4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %rev)
+  ret i32 %red
+}
+
+define i32 @umin_reduce_of_reverse_v4i32(<4 x i32> %v0) {
+; CHECK-LABEL: @umin_reduce_of_reverse_v4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %rev)
+  ret i32 %red
+}
+
+define i32 @umax_reduce_of_reverse_nxv4i32(<vscale x 4 x i32> %v0) {
+; CHECK-LABEL: @umax_reduce_of_reverse_nxv4i32(
+; CHECK-NEXT:    [[RED:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[V0:%.*]])
+; CHECK-NEXT:    ret i32 [[RED]]
+;
+  %rev = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %v0)
+  %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %rev)
+  ret i32 %red
+}
+
 ; negative test - extra uses could create extra instructions
 
 define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {

``````````

</details>


https://github.com/llvm/llvm-project/pull/91743