[llvm] [DAG] Fix Failure to reassociate SMAX/SMIN/UMAX/UMIN (PR #82175)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 7 07:05:33 PST 2024
https://github.com/SahilPatidar updated https://github.com/llvm/llvm-project/pull/82175
>From ced226bd8068204ee171962beb4e22c5e9bf0e3d Mon Sep 17 00:00:00 2001
From: Sahil Patidar <patidarsahil2001 at gmail.com>
Date: Sun, 18 Feb 2024 22:17:26 +0530
Subject: [PATCH 1/2] [DAG] Fix Failure to reassociate SMAX/SMIN/UMAX/UMIN
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +
.../RISCV/rvv/fixed-vectors-trunc-vp.ll | 121 +++++++++---------
2 files changed, 66 insertions(+), 59 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 932944d4b6f3fd..5cfc0dc57d7ca3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5561,6 +5561,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
+ // reassociate minmax
+ if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
+ return RMINMAX;
+
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
if (!TLI.isOperationLegal(Opcode, VT) &&
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index e7b74737239154..4f16ce28bbb7e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -310,23 +310,24 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT: mv a6, a7
; CHECK-NEXT: bltu a7, a3, .LBB16_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a7, 64
+; CHECK-NEXT: li a6, 64
; CHECK-NEXT: .LBB16_4:
; CHECK-NEXT: addi a5, a1, 384
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a1)
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 3
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a6, a7, -32
-; CHECK-NEXT: sltu t0, a7, a6
-; CHECK-NEXT: addi t0, t0, -1
-; CHECK-NEXT: and a6, t0, a6
+; CHECK-NEXT: csrr t0, vlenb
+; CHECK-NEXT: slli t0, t0, 3
+; CHECK-NEXT: add t0, sp, t0
+; CHECK-NEXT: addi t0, t0, 16
+; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
+; CHECK-NEXT: addi t0, a6, -32
+; CHECK-NEXT: sltu a6, a6, t0
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a6, a6, t0
; CHECK-NEXT: addi t0, a6, -16
; CHECK-NEXT: sltu t1, a6, t0
; CHECK-NEXT: addi t1, t1, -1
@@ -364,14 +365,15 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT: mv a5, a4
; CHECK-NEXT: bltu a4, a3, .LBB16_8
; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: li a4, 32
+; CHECK-NEXT: li a5, 32
; CHECK-NEXT: .LBB16_8:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a1, a4, -16
-; CHECK-NEXT: sltu a5, a4, a1
+; CHECK-NEXT: addi a1, a5, -16
+; CHECK-NEXT: sltu a5, a5, a1
; CHECK-NEXT: addi a5, a5, -1
; CHECK-NEXT: and a1, a5, a1
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
@@ -387,62 +389,63 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t
; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: mv a1, a7
; CHECK-NEXT: bltu a7, a3, .LBB16_12
; CHECK-NEXT: # %bb.11:
-; CHECK-NEXT: li a7, 32
+; CHECK-NEXT: li a1, 32
; CHECK-NEXT: .LBB16_12:
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 24
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: li a5, 24
+; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v24, v8
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 56
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: li a5, 56
+; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: vslideup.vi v8, v24, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 56
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: li a5, 56
+; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v24, v8
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 48
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: li a5, 48
+; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: vslideup.vi v8, v24, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 48
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: li a5, 48
+; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; CHECK-NEXT: vmv4r.v v8, v0
; CHECK-NEXT: vslideup.vi v8, v16, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 24
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a1, a7, -16
-; CHECK-NEXT: sltu a4, a7, a1
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a1, a4, a1
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: li a5, 24
+; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a4, a1, -16
+; CHECK-NEXT: sltu a1, a1, a4
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a4
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: csrr a1, vlenb
>From cb5f14f76daad1f9998fb69d44dd0bd28af06b40 Mon Sep 17 00:00:00 2001
From: Sahil Patidar <patidarsahil2001 at gmail.com>
Date: Tue, 20 Feb 2024 11:14:55 +0530
Subject: [PATCH 2/2] update CodeGen tests
---
.../RISCV/rvv/fixed-vectors-strided-vpload.ll | 84 +++++++++----------
.../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 30 +++----
llvm/test/CodeGen/X86/combine-smin.ll | 6 --
llvm/test/CodeGen/X86/combine-umax.ll | 8 +-
llvm/test/CodeGen/X86/combine-umin.ll | 8 +-
5 files changed, 61 insertions(+), 75 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index 2ae031798f5bd6..2ae058128eaa00 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -549,36 +549,36 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
; CHECK-RV32-NEXT: # %bb.1:
; CHECK-RV32-NEXT: li a3, 32
; CHECK-RV32-NEXT: .LBB42_2:
-; CHECK-RV32-NEXT: mul a5, a3, a2
-; CHECK-RV32-NEXT: addi a6, a4, -32
-; CHECK-RV32-NEXT: sltu a4, a4, a6
-; CHECK-RV32-NEXT: addi a4, a4, -1
-; CHECK-RV32-NEXT: and a6, a4, a6
-; CHECK-RV32-NEXT: li a4, 16
-; CHECK-RV32-NEXT: add a5, a1, a5
-; CHECK-RV32-NEXT: bltu a6, a4, .LBB42_4
+; CHECK-RV32-NEXT: mul a6, a3, a2
+; CHECK-RV32-NEXT: addi a5, a4, -32
+; CHECK-RV32-NEXT: sltu a7, a4, a5
+; CHECK-RV32-NEXT: addi a7, a7, -1
+; CHECK-RV32-NEXT: and a7, a7, a5
+; CHECK-RV32-NEXT: li a5, 16
+; CHECK-RV32-NEXT: add a6, a1, a6
+; CHECK-RV32-NEXT: bltu a7, a5, .LBB42_4
; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: li a6, 16
+; CHECK-RV32-NEXT: li a7, 16
; CHECK-RV32-NEXT: .LBB42_4:
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
-; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t
-; CHECK-RV32-NEXT: addi a5, a3, -16
-; CHECK-RV32-NEXT: sltu a6, a3, a5
-; CHECK-RV32-NEXT: addi a6, a6, -1
-; CHECK-RV32-NEXT: and a5, a6, a5
-; CHECK-RV32-NEXT: bltu a3, a4, .LBB42_6
+; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v16, (a6), a2, v0.t
+; CHECK-RV32-NEXT: addi a6, a3, -16
+; CHECK-RV32-NEXT: sltu a3, a3, a6
+; CHECK-RV32-NEXT: addi a3, a3, -1
+; CHECK-RV32-NEXT: and a3, a3, a6
+; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_6
; CHECK-RV32-NEXT: # %bb.5:
-; CHECK-RV32-NEXT: li a3, 16
+; CHECK-RV32-NEXT: li a4, 16
; CHECK-RV32-NEXT: .LBB42_6:
-; CHECK-RV32-NEXT: mul a4, a3, a2
-; CHECK-RV32-NEXT: add a4, a1, a4
+; CHECK-RV32-NEXT: mul a5, a4, a2
+; CHECK-RV32-NEXT: add a5, a1, a5
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t
; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t
+; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-RV32-NEXT: vmv1r.v v0, v8
; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t
; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
@@ -599,36 +599,36 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
; CHECK-RV64-NEXT: # %bb.1:
; CHECK-RV64-NEXT: li a4, 32
; CHECK-RV64-NEXT: .LBB42_2:
-; CHECK-RV64-NEXT: mul a5, a4, a2
-; CHECK-RV64-NEXT: addi a6, a3, -32
-; CHECK-RV64-NEXT: sltu a3, a3, a6
-; CHECK-RV64-NEXT: addi a3, a3, -1
-; CHECK-RV64-NEXT: and a6, a3, a6
-; CHECK-RV64-NEXT: li a3, 16
-; CHECK-RV64-NEXT: add a5, a1, a5
-; CHECK-RV64-NEXT: bltu a6, a3, .LBB42_4
+; CHECK-RV64-NEXT: mul a6, a4, a2
+; CHECK-RV64-NEXT: addi a5, a3, -32
+; CHECK-RV64-NEXT: sltu a7, a3, a5
+; CHECK-RV64-NEXT: addi a7, a7, -1
+; CHECK-RV64-NEXT: and a7, a7, a5
+; CHECK-RV64-NEXT: li a5, 16
+; CHECK-RV64-NEXT: add a6, a1, a6
+; CHECK-RV64-NEXT: bltu a7, a5, .LBB42_4
; CHECK-RV64-NEXT: # %bb.3:
-; CHECK-RV64-NEXT: li a6, 16
+; CHECK-RV64-NEXT: li a7, 16
; CHECK-RV64-NEXT: .LBB42_4:
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
-; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t
-; CHECK-RV64-NEXT: addi a5, a4, -16
-; CHECK-RV64-NEXT: sltu a6, a4, a5
-; CHECK-RV64-NEXT: addi a6, a6, -1
-; CHECK-RV64-NEXT: and a5, a6, a5
-; CHECK-RV64-NEXT: bltu a4, a3, .LBB42_6
+; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vlse64.v v16, (a6), a2, v0.t
+; CHECK-RV64-NEXT: addi a6, a4, -16
+; CHECK-RV64-NEXT: sltu a4, a4, a6
+; CHECK-RV64-NEXT: addi a4, a4, -1
+; CHECK-RV64-NEXT: and a4, a4, a6
+; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_6
; CHECK-RV64-NEXT: # %bb.5:
-; CHECK-RV64-NEXT: li a4, 16
+; CHECK-RV64-NEXT: li a3, 16
; CHECK-RV64-NEXT: .LBB42_6:
-; CHECK-RV64-NEXT: mul a3, a4, a2
-; CHECK-RV64-NEXT: add a3, a1, a3
+; CHECK-RV64-NEXT: mul a5, a3, a2
+; CHECK-RV64-NEXT: add a5, a1, a5
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
-; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t
; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t
+; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-RV64-NEXT: vmv1r.v v0, v8
; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t
; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
index bb213c9276a3a9..618b875be56651 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
@@ -418,20 +418,20 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) {
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: addi a4, a3, -16
-; CHECK-NEXT: sltu a5, a3, a4
-; CHECK-NEXT: addi a5, a5, -1
-; CHECK-NEXT: and a4, a5, a4
-; CHECK-NEXT: addi a5, a1, 128
+; CHECK-NEXT: sltu a3, a3, a4
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: addi a4, a1, 128
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a5), v0.t
-; CHECK-NEXT: addi a4, a2, -32
-; CHECK-NEXT: sltu a2, a2, a4
-; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a4, a2, a4
-; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: bltu a4, a2, .LBB32_4
+; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a4), v0.t
+; CHECK-NEXT: addi a3, a2, -32
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a4, a4, a3
+; CHECK-NEXT: li a3, 16
+; CHECK-NEXT: bltu a4, a3, .LBB32_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB32_4:
@@ -440,11 +440,11 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) {
; CHECK-NEXT: vslidedown.vi v0, v8, 4
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a5), v0.t
-; CHECK-NEXT: bltu a3, a2, .LBB32_6
+; CHECK-NEXT: bltu a2, a3, .LBB32_6
; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: li a3, 16
+; CHECK-NEXT: li a2, 16
; CHECK-NEXT: .LBB32_6:
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vle64.v v8, (a1), v0.t
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
diff --git a/llvm/test/CodeGen/X86/combine-smin.ll b/llvm/test/CodeGen/X86/combine-smin.ll
index 87ae495f945e0a..b58934256a2092 100644
--- a/llvm/test/CodeGen/X86/combine-smin.ll
+++ b/llvm/test/CodeGen/X86/combine-smin.ll
@@ -70,9 +70,6 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
; SSE2-LABEL: test_v16i8_reassociation:
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
-; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: retq
@@ -81,21 +78,18 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
; SSE41: # %bb.0:
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pminsb %xmm1, %xmm0
-; SSE41-NEXT: pminsb %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: test_v16i8_reassociation:
; SSE42: # %bb.0:
; SSE42-NEXT: pxor %xmm1, %xmm1
; SSE42-NEXT: pminsb %xmm1, %xmm0
-; SSE42-NEXT: pminsb %xmm1, %xmm0
; SSE42-NEXT: retq
;
; AVX-LABEL: test_v16i8_reassociation:
; AVX: # %bb.0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
%2 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer)
diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll
index 52bb9ee7fcb9f5..25f8ec891a2472 100644
--- a/llvm/test/CodeGen/X86/combine-umax.ll
+++ b/llvm/test/CodeGen/X86/combine-umax.ll
@@ -45,16 +45,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
; SSE-LABEL: test_v16i8_reassociation:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE-NEXT: pmaxub %xmm1, %xmm0
-; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: pmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8_reassociation:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
%2 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll
index 5b3b7f942805d9..76dbcb50bf8c7c 100644
--- a/llvm/test/CodeGen/X86/combine-umin.ll
+++ b/llvm/test/CodeGen/X86/combine-umin.ll
@@ -62,16 +62,12 @@ define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) {
; SSE-LABEL: test_v16i8_reassociation:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; SSE-NEXT: pminub %xmm1, %xmm0
-; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8_reassociation:
; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
%2 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
More information about the llvm-commits
mailing list