[llvm] [RISCV] Undo unprofitable zext of icmp combine (PR #134306)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 3 16:26:23 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/134306
>From 7b7b7d51c9cafc36af69a663cce3bfa6ebbebdd5 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 3 Apr 2025 18:58:12 +0100
Subject: [PATCH 1/3] Precommit tests
---
.../RISCV/rvv/riscv-codegenprepare-asm.ll | 87 +++++++++++++++++++
.../CodeGen/RISCV/rvv/riscv-codegenprepare.ll | 74 ++++++++++++++++
2 files changed, 161 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
index 4e5f6e0f65489..9fe3c35e82bde 100644
--- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
@@ -498,3 +498,90 @@ vector.body: ; preds = %vector.body, %entry
for.cond.cleanup: ; preds = %vector.body
ret float %red
}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
+; CHECK-LABEL: reverse_zexticmp_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
+; CHECK-LABEL: reverse_zexticmp_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
+; CHECK-LABEL: reverse_zexticmp_neg_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
+ %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
+ %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
+ %4 = and <vscale x 1 x i8> %3, splat (i8 1)
+ ret <vscale x 1 x i8> %4
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
+; CHECK-LABEL: reverse_zexticmp_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
+; CHECK-LABEL: reverse_zexticmp_neg_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vnot.v v8, v8
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
+ %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
+ %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
+ %4 = and <vscale x 1 x i8> %3, splat (i8 1)
+ ret <vscale x 1 x i8> %4
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
index 8967fb8bf01ac..50168e88321d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
@@ -528,3 +528,77 @@ vector.body: ; preds = %vector.body, %entry
for.cond.cleanup: ; preds = %vector.body
ret float %red
}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
+; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i16(
+; CHECK-SAME: <vscale x 1 x i16> [[X:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i16> [[X]] to <vscale x 1 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <vscale x 1 x i8> [[TMP1]], splat (i8 2)
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 1 x i8> [[TMP2]], splat (i8 1)
+; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
+;
+ %1 = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
+; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i32(
+; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i32> [[X]] to <vscale x 1 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <vscale x 1 x i8> [[TMP1]], splat (i8 2)
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 1 x i8> [[TMP2]], splat (i8 1)
+; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
+;
+ %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
+; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(
+; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i32> [[X]] to <vscale x 1 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = xor <vscale x 1 x i8> [[TMP1]], splat (i8 -1)
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <vscale x 1 x i8> [[TMP2]], splat (i8 2)
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 1 x i8> [[TMP3]], splat (i8 1)
+; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
+;
+ %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
+ %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
+ %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
+ %4 = and <vscale x 1 x i8> %3, splat (i8 1)
+ ret <vscale x 1 x i8> %4
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
+; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i64(
+; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i64> [[X]] to <vscale x 1 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <vscale x 1 x i8> [[TMP1]], splat (i8 2)
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 1 x i8> [[TMP2]], splat (i8 1)
+; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
+;
+ %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
+; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(
+; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i64> [[X]] to <vscale x 1 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = xor <vscale x 1 x i8> [[TMP1]], splat (i8 -1)
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <vscale x 1 x i8> [[TMP2]], splat (i8 2)
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 1 x i8> [[TMP3]], splat (i8 1)
+; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
+;
+ %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
+ %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
+ %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
+ %4 = and <vscale x 1 x i8> %3, splat (i8 1)
+ ret <vscale x 1 x i8> %4
+}
>From 3ec5d3be4d25d559af7f7423b7bc10436c9d21cc Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 3 Apr 2025 21:36:54 +0100
Subject: [PATCH 2/3] [RISCV] Undo unprofitable zext of icmp combine
---
llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 71 ++++++++++++++++++-
.../RISCV/rvv/riscv-codegenprepare-asm.ll | 46 ++++++------
.../CodeGen/RISCV/rvv/riscv-codegenprepare.ll | 26 ++++---
3 files changed, 100 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index b5cb05f30fb26..e04f3b1d3478e 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -62,10 +63,74 @@ class RISCVCodeGenPrepare : public FunctionPass,
} // end anonymous namespace
-// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
-// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
-// the upper 32 bits with ones.
+// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
+// truncation. But RVV doesn't have truncation instructions for more than twice
+// the bitwidth.
+//
+// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
+//
+// vsetvli a0, zero, e32, m2, ta, ma
+// vnsrl.wi v12, v8, 0
+// vsetvli zero, zero, e16, m1, ta, ma
+// vnsrl.wi v8, v12, 0
+// vsetvli zero, zero, e8, mf2, ta, ma
+// vnsrl.wi v8, v8, 0
+//
+// So reverse the combine so we generate an vmseq/vmsne again:
+//
+// and (lshr (trunc X), ShAmt), 1
+// -->
+// zext (icmp ne (and X, (1 << ShAmt)), 0)
+//
+// and (lshr (not (trunc X)), ShAmt), 1
+// -->
+// zext (icmp eq (and X, (1 << ShAmt)), 0)
+static bool reverseZExtICmpCombine(BinaryOperator &BO) {
+ using namespace PatternMatch;
+
+ assert(BO.getOpcode() == BinaryOperator::And);
+
+ if (!BO.getType()->isVectorTy())
+ return false;
+ const APInt *ShAmt;
+ Value *Inner;
+ if (!match(&BO,
+ m_And(m_OneUse(m_LShr(m_OneUse(m_Value(Inner)), m_APInt(ShAmt))),
+ m_One())))
+ return false;
+
+ Value *X;
+ bool IsNot;
+ if (match(Inner, m_Not(m_Trunc(m_Value(X)))))
+ IsNot = true;
+ else if (match(Inner, m_Trunc(m_Value(X))))
+ IsNot = false;
+ else
+ return false;
+
+ if (BO.getType()->getScalarSizeInBits() >=
+ X->getType()->getScalarSizeInBits() / 2)
+ return false;
+
+ IRBuilder<> Builder(&BO);
+ Value *Res = Builder.CreateAnd(
+ X, ConstantInt::get(X->getType(), 1 << ShAmt->getZExtValue()));
+ Res = Builder.CreateICmp(IsNot ? CmpInst::Predicate::ICMP_EQ
+ : CmpInst::Predicate::ICMP_NE,
+ Res, ConstantInt::get(X->getType(), 0));
+ Res = Builder.CreateZExt(Res, BO.getType());
+ BO.replaceAllUsesWith(Res);
+ RecursivelyDeleteTriviallyDeadInstructions(&BO);
+ return true;
+}
+
bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
+ if (reverseZExtICmpCombine(BO))
+ return true;
+
+ // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
+ // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
+ // the upper 32 bits with ones.
if (!ST->is64Bit())
return false;
diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
index 9fe3c35e82bde..b6593eac6d92c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
@@ -516,12 +516,12 @@ define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
; CHECK-LABEL: reverse_zexticmp_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
@@ -532,13 +532,12 @@ define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
; CHECK-LABEL: reverse_zexticmp_neg_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
%2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
@@ -550,14 +549,12 @@ define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
; CHECK-LABEL: reverse_zexticmp_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
%2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
@@ -568,15 +565,12 @@ define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
; CHECK-LABEL: reverse_zexticmp_neg_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vnot.v v8, v8
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: ret
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
%2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
index 50168e88321d0..483e797151325 100644
--- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
@@ -546,9 +546,9 @@ define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i32(
; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i32> [[X]] to <vscale x 1 x i8>
-; CHECK-NEXT: [[TMP2:%.*]] = lshr <vscale x 1 x i8> [[TMP1]], splat (i8 2)
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 1 x i8> [[TMP2]], splat (i8 1)
+; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i32> [[X]], splat (i32 4)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <vscale x 1 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
;
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
@@ -560,10 +560,9 @@ define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(
; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i32> [[X]] to <vscale x 1 x i8>
-; CHECK-NEXT: [[TMP2:%.*]] = xor <vscale x 1 x i8> [[TMP1]], splat (i8 -1)
-; CHECK-NEXT: [[TMP3:%.*]] = lshr <vscale x 1 x i8> [[TMP2]], splat (i8 2)
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 1 x i8> [[TMP3]], splat (i8 1)
+; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i32> [[X]], splat (i32 4)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <vscale x 1 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
;
%1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
@@ -576,9 +575,9 @@ define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i64(
; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i64> [[X]] to <vscale x 1 x i8>
-; CHECK-NEXT: [[TMP2:%.*]] = lshr <vscale x 1 x i8> [[TMP1]], splat (i8 2)
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 1 x i8> [[TMP2]], splat (i8 1)
+; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i64> [[X]], splat (i64 4)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <vscale x 1 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
;
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
@@ -590,10 +589,9 @@ define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(
; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i64> [[X]] to <vscale x 1 x i8>
-; CHECK-NEXT: [[TMP2:%.*]] = xor <vscale x 1 x i8> [[TMP1]], splat (i8 -1)
-; CHECK-NEXT: [[TMP3:%.*]] = lshr <vscale x 1 x i8> [[TMP2]], splat (i8 2)
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 1 x i8> [[TMP3]], splat (i8 1)
+; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i64> [[X]], splat (i64 4)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <vscale x 1 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
;
%1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
>From 170056720f91c5855f10f0564d262f901b26abda Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 4 Apr 2025 00:24:02 +0100
Subject: [PATCH 3/3] Move to DAG combine, check for +zve32x
---
llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 71 +--------------
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 65 ++++++++++++++
.../RISCV/rvv/riscv-codegenprepare-asm.ll | 81 -----------------
.../CodeGen/RISCV/rvv/riscv-codegenprepare.ll | 72 ----------------
llvm/test/CodeGen/RISCV/rvv/zext-icmp.ll | 86 +++++++++++++++++++
5 files changed, 154 insertions(+), 221 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/zext-icmp.ll
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index e04f3b1d3478e..b5cb05f30fb26 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -25,7 +25,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -63,74 +62,10 @@ class RISCVCodeGenPrepare : public FunctionPass,
} // end anonymous namespace
-// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
-// truncation. But RVV doesn't have truncation instructions for more than twice
-// the bitwidth.
-//
-// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
-//
-// vsetvli a0, zero, e32, m2, ta, ma
-// vnsrl.wi v12, v8, 0
-// vsetvli zero, zero, e16, m1, ta, ma
-// vnsrl.wi v8, v12, 0
-// vsetvli zero, zero, e8, mf2, ta, ma
-// vnsrl.wi v8, v8, 0
-//
-// So reverse the combine so we generate an vmseq/vmsne again:
-//
-// and (lshr (trunc X), ShAmt), 1
-// -->
-// zext (icmp ne (and X, (1 << ShAmt)), 0)
-//
-// and (lshr (not (trunc X)), ShAmt), 1
-// -->
-// zext (icmp eq (and X, (1 << ShAmt)), 0)
-static bool reverseZExtICmpCombine(BinaryOperator &BO) {
- using namespace PatternMatch;
-
- assert(BO.getOpcode() == BinaryOperator::And);
-
- if (!BO.getType()->isVectorTy())
- return false;
- const APInt *ShAmt;
- Value *Inner;
- if (!match(&BO,
- m_And(m_OneUse(m_LShr(m_OneUse(m_Value(Inner)), m_APInt(ShAmt))),
- m_One())))
- return false;
-
- Value *X;
- bool IsNot;
- if (match(Inner, m_Not(m_Trunc(m_Value(X)))))
- IsNot = true;
- else if (match(Inner, m_Trunc(m_Value(X))))
- IsNot = false;
- else
- return false;
-
- if (BO.getType()->getScalarSizeInBits() >=
- X->getType()->getScalarSizeInBits() / 2)
- return false;
-
- IRBuilder<> Builder(&BO);
- Value *Res = Builder.CreateAnd(
- X, ConstantInt::get(X->getType(), 1 << ShAmt->getZExtValue()));
- Res = Builder.CreateICmp(IsNot ? CmpInst::Predicate::ICMP_EQ
- : CmpInst::Predicate::ICMP_NE,
- Res, ConstantInt::get(X->getType(), 0));
- Res = Builder.CreateZExt(Res, BO.getType());
- BO.replaceAllUsesWith(Res);
- RecursivelyDeleteTriviallyDeadInstructions(&BO);
- return true;
-}
-
+// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
+// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
+// the upper 32 bits with ones.
bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
- if (reverseZExtICmpCombine(BO))
- return true;
-
- // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
- // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
- // the upper 32 bits with ones.
if (!ST->is64Bit())
return false;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8c409adedc2df..a60adc4b33563 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15040,6 +15040,68 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
return combineTruncSelectToSMaxUSat(N, DAG);
}
+// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
+// truncation. But RVV doesn't have truncation instructions for more than twice
+// the bitwidth.
+//
+// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
+//
+// vsetvli a0, zero, e32, m2, ta, ma
+// vnsrl.wi v12, v8, 0
+// vsetvli zero, zero, e16, m1, ta, ma
+// vnsrl.wi v8, v12, 0
+// vsetvli zero, zero, e8, mf2, ta, ma
+// vnsrl.wi v8, v8, 0
+//
+// So reverse the combine so we generate an vmseq/vmsne again:
+//
+// and (lshr (trunc X), ShAmt), 1
+// -->
+// zext (icmp ne (and X, (1 << ShAmt)), 0)
+//
+// and (lshr (not (trunc X)), ShAmt), 1
+// -->
+// zext (icmp eq (and X, (1 << ShAmt)), 0)
+static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ using namespace SDPatternMatch;
+ SDLoc DL(N);
+
+ if (!Subtarget.hasVInstructions())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
+ return SDValue();
+
+ APInt ShAmt;
+ SDValue Inner;
+ if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
+ m_One())))
+ return SDValue();
+
+ SDValue X;
+ bool IsNot;
+ if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
+ IsNot = true;
+ else if (sd_match(Inner, m_Trunc(m_Value(X))))
+ IsNot = false;
+ else
+ return SDValue();
+
+ EVT WideVT = X.getValueType();
+ if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
+ return SDValue();
+
+ SDValue Res =
+ DAG.getNode(ISD::AND, DL, WideVT, X,
+ DAG.getConstant(1 << ShAmt.getZExtValue(), DL, WideVT));
+ Res = DAG.getSetCC(DL, WideVT.changeElementType(MVT::i1), Res,
+ DAG.getConstant(0, DL, WideVT),
+ IsNot ? ISD::SETEQ : ISD::SETNE);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+}
+
// Combines two comparison operation and logic operation to one selection
// operation(min, max) and logic operation. Returns new constructed Node if
// conditions for optimization are satisfied.
@@ -15067,6 +15129,9 @@ static SDValue performANDCombine(SDNode *N,
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
}
+ if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
+ return V;
+
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
index b6593eac6d92c..4e5f6e0f65489 100644
--- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll
@@ -498,84 +498,3 @@ vector.body: ; preds = %vector.body, %entry
for.cond.cleanup: ; preds = %vector.body
ret float %red
}
-
-define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
-; CHECK-LABEL: reverse_zexticmp_i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vand.vi v8, v8, 1
-; CHECK-NEXT: ret
- %1 = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
- %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
- %3 = and <vscale x 1 x i8> %2, splat (i8 1)
- ret <vscale x 1 x i8> %3
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
-; CHECK-LABEL: reverse_zexticmp_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vand.vi v8, v8, 4
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: ret
- %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
- %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
- %3 = and <vscale x 1 x i8> %2, splat (i8 1)
- ret <vscale x 1 x i8> %3
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
-; CHECK-LABEL: reverse_zexticmp_neg_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vand.vi v8, v8, 4
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: ret
- %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
- %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
- %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
- %4 = and <vscale x 1 x i8> %3, splat (i8 1)
- ret <vscale x 1 x i8> %4
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
-; CHECK-LABEL: reverse_zexticmp_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vand.vi v8, v8, 4
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: ret
- %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
- %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
- %3 = and <vscale x 1 x i8> %2, splat (i8 1)
- ret <vscale x 1 x i8> %3
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
-; CHECK-LABEL: reverse_zexticmp_neg_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vand.vi v8, v8, 4
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: ret
- %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
- %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
- %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
- %4 = and <vscale x 1 x i8> %3, splat (i8 1)
- ret <vscale x 1 x i8> %4
-}
-
diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
index 483e797151325..8967fb8bf01ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll
@@ -528,75 +528,3 @@ vector.body: ; preds = %vector.body, %entry
for.cond.cleanup: ; preds = %vector.body
ret float %red
}
-
-define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
-; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i16(
-; CHECK-SAME: <vscale x 1 x i16> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 1 x i16> [[X]] to <vscale x 1 x i8>
-; CHECK-NEXT: [[TMP2:%.*]] = lshr <vscale x 1 x i8> [[TMP1]], splat (i8 2)
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 1 x i8> [[TMP2]], splat (i8 1)
-; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
-;
- %1 = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
- %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
- %3 = and <vscale x 1 x i8> %2, splat (i8 1)
- ret <vscale x 1 x i8> %3
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
-; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i32(
-; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i32> [[X]], splat (i32 4)
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <vscale x 1 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
-; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
-;
- %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
- %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
- %3 = and <vscale x 1 x i8> %2, splat (i8 1)
- ret <vscale x 1 x i8> %3
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
-; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(
-; CHECK-SAME: <vscale x 1 x i32> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i32> [[X]], splat (i32 4)
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <vscale x 1 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
-; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
-;
- %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
- %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
- %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
- %4 = and <vscale x 1 x i8> %3, splat (i8 1)
- ret <vscale x 1 x i8> %4
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
-; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_i64(
-; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i64> [[X]], splat (i64 4)
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <vscale x 1 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
-; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP3]]
-;
- %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
- %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
- %3 = and <vscale x 1 x i8> %2, splat (i8 1)
- ret <vscale x 1 x i8> %3
-}
-
-define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
-; CHECK-LABEL: define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(
-; CHECK-SAME: <vscale x 1 x i64> [[X:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: [[TMP1:%.*]] = and <vscale x 1 x i64> [[X]], splat (i64 4)
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <vscale x 1 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
-; CHECK-NEXT: ret <vscale x 1 x i8> [[TMP4]]
-;
- %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
- %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
- %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
- %4 = and <vscale x 1 x i8> %3, splat (i8 1)
- ret <vscale x 1 x i8> %4
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/zext-icmp.ll b/llvm/test/CodeGen/RISCV/rvv/zext-icmp.ll
new file mode 100644
index 0000000000000..e5043281a27dd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/zext-icmp.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v | FileCheck %s
+
+; Test that we reverse InstCombinerImpl::transformZExtICmp when unprofitable
+
+define <vscale x 1 x i8> @reverse_zexticmp_i16(<vscale x 1 x i16> %x) {
+; CHECK-LABEL: reverse_zexticmp_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsrl.vi v8, v8, 2
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i32(<vscale x 1 x i32> %x) {
+; CHECK-LABEL: reverse_zexticmp_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_neg_i32(<vscale x 1 x i32> %x) {
+; CHECK-LABEL: reverse_zexticmp_neg_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmseq.vi v0, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
+ %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
+ %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
+ %4 = and <vscale x 1 x i8> %3, splat (i8 1)
+ ret <vscale x 1 x i8> %4
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_i64(<vscale x 1 x i64> %x) {
+; CHECK-LABEL: reverse_zexticmp_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
+ %2 = lshr <vscale x 1 x i8> %1, splat (i8 2)
+ %3 = and <vscale x 1 x i8> %2, splat (i8 1)
+ ret <vscale x 1 x i8> %3
+}
+
+define <vscale x 1 x i8> @reverse_zexticmp_neg_i64(<vscale x 1 x i64> %x) {
+; CHECK-LABEL: reverse_zexticmp_neg_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vand.vi v8, v8, 4
+; CHECK-NEXT: vmseq.vi v0, v8, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: ret
+ %1 = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
+ %2 = xor <vscale x 1 x i8> %1, splat (i8 -1)
+ %3 = lshr <vscale x 1 x i8> %2, splat (i8 2)
+ %4 = and <vscale x 1 x i8> %3, splat (i8 1)
+ ret <vscale x 1 x i8> %4
+}
+
More information about the llvm-commits
mailing list