[llvm] [RISCV] Sink NOT to be fold into ANDN/ORN/XNOR/VANDN (PR #131632)
Piotr Fusik via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 08:51:09 PDT 2025
https://github.com/pfusik created https://github.com/llvm/llvm-project/pull/131632
Undoes a negation being hoisted out of a loop, so that it can be fold
into an inverted bitwise operation in the loop.
Implements #108840 on RISC-V
>From 67ca73f8d3fd725ed90ff0a7bc9df6ce345ef3b6 Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Mon, 17 Mar 2025 16:14:05 +0100
Subject: [PATCH 1/2] [RISCV][test] Add tests for sinking NOT to be fold into
ANDN/ORN/XNOR/VANDN
---
llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll | 252 ++++++++++
llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll | 240 +++++++++
llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll | 511 +++++++++++++++++++-
3 files changed, 999 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
index b6344f88cddaa..a319156f8d1b8 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
@@ -438,3 +438,255 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
%cmpeq = icmp ne i64 %and, %b
ret i1 %cmpeq
}
+
+define i32 @and_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB24_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: .LBB24_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i32 %a, %x
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i32 @and_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i32_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB25_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: .LBB25_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i32 %x, %a
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i64 @and_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a4, .LBB26_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a3, a3
+; CHECK-NEXT: not a2, a2
+; CHECK-NEXT: and a0, a2, a0
+; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: .LBB26_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i64 %a, %x
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i64 @and_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i64_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a4, .LBB27_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a3, a3
+; CHECK-NEXT: not a2, a2
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: and a1, a1, a3
+; CHECK-NEXT: .LBB27_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i64 %x, %a
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i32 @or_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB28_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: .LBB28_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i32 %a, %x
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i32 @or_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i32_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB29_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: .LBB29_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i32 %x, %a
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i64 @or_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a4, .LBB30_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a3, a3
+; CHECK-NEXT: not a2, a2
+; CHECK-NEXT: or a0, a2, a0
+; CHECK-NEXT: or a1, a3, a1
+; CHECK-NEXT: .LBB30_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i64 %a, %x
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i64 @or_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i64_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a4, .LBB31_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a3, a3
+; CHECK-NEXT: not a2, a2
+; CHECK-NEXT: or a0, a0, a2
+; CHECK-NEXT: or a1, a1, a3
+; CHECK-NEXT: .LBB31_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i64 %x, %a
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i32 @xor_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB32_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: .LBB32_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i32 %a, %x
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i32 @xor_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i32_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB33_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: .LBB33_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i32 %x, %a
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i64 @xor_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a4, .LBB34_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a3, a3
+; CHECK-NEXT: not a2, a2
+; CHECK-NEXT: xor a0, a2, a0
+; CHECK-NEXT: xor a1, a3, a1
+; CHECK-NEXT: .LBB34_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i64 %a, %x
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i64 @xor_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i64_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a4, .LBB35_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a3, a3
+; CHECK-NEXT: not a2, a2
+; CHECK-NEXT: xor a0, a0, a2
+; CHECK-NEXT: xor a1, a1, a3
+; CHECK-NEXT: .LBB35_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i64 %x, %a
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
index bf077364c9c7a..23b2c2d70a37a 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
@@ -554,3 +554,243 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
%cmpeq = icmp ne i64 %and, %b
ret i1 %cmpeq
}
+
+define i32 @and_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB32_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: .LBB32_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i32 %a, %x
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i32 @and_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i32_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB33_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: .LBB33_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i32 %x, %a
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i64 @and_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB34_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: .LBB34_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i64 %a, %x
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i64 @and_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: and_hoisted_not_i64_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB35_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: .LBB35_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and i64 %x, %a
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i32 @or_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB36_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: .LBB36_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i32 %a, %x
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i32 @or_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i32_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB37_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: .LBB37_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i32 %x, %a
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i64 @or_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB38_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: .LBB38_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i64 %a, %x
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i64 @or_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: or_hoisted_not_i64_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB39_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: .LBB39_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = or i64 %x, %a
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i32 @xor_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB40_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: .LBB40_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i32 %a, %x
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i32 @xor_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i32_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB41_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: .LBB41_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i32 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i32 %x, %a
+ ret i32 %masked
+
+identity:
+ ret i32 %x
+}
+
+define i64 @xor_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB42_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: .LBB42_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i64 %a, %x
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
+
+define i64 @xor_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
+; CHECK-LABEL: xor_hoisted_not_i64_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a2, .LBB43_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: .LBB43_2: # %identity
+; CHECK-NEXT: ret
+ %a = xor i64 %m, -1
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = xor i64 %x, %a
+ ret i64 %masked
+
+identity:
+ ret i64 %x
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
index cf73dceaae306..d4f9f41cd4ab7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-NOZBB,CHECK-ZVKB32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-NOZBB,CHECK-ZVKB64
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-ZBB,CHECK-ZVKB32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-ZBB,CHECK-ZVKB64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-NOZBB,CHECK-ZVKB32,CHECK-ZVKB-NOZBB32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-NOZBB,CHECK-ZVKB64,CHECK-ZVKB-NOZBB64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvkb,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-ZBB,CHECK-ZVKB32,CHECK-ZVKB-ZBB32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvkb,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVKB,CHECK-ZVKB-ZBB,CHECK-ZVKB64,CHECK-ZVKB-ZBB64
define <vscale x 1 x i8> @vandn_vv_nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %y) {
; CHECK-LABEL: vandn_vv_nxv1i8:
@@ -2109,3 +2109,506 @@ define <vscale x 1 x i16> @vand_vadd_vx_imm16(<vscale x 1 x i16> %x) {
%b = add <vscale x 1 x i16> %a, splat (i16 32767)
ret <vscale x 1 x i16> %b
}
+
+define <vscale x 1 x i8> @vand_vx_hoisted_not(<vscale x 1 x i8> %x, i8 %m, i1 zeroext %cond) {
+; CHECK-LABEL: vand_vx_hoisted_not:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a1, .LBB94_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a0, a0
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: .LBB94_2: # %identity
+; CHECK-NEXT: ret
+;
+; CHECK-ZVKB-LABEL: vand_vx_hoisted_not:
+; CHECK-ZVKB: # %bb.0:
+; CHECK-ZVKB-NEXT: beqz a1, .LBB94_2
+; CHECK-ZVKB-NEXT: # %bb.1: # %mask
+; CHECK-ZVKB-NEXT: not a0, a0
+; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-ZVKB-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVKB-NEXT: .LBB94_2: # %identity
+; CHECK-ZVKB-NEXT: ret
+ %a = xor i8 %m, -1
+ %head = insertelement <vscale x 1 x i8> poison, i8 %a, i32 0
+ %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and <vscale x 1 x i8> %splat, %x
+ ret <vscale x 1 x i8> %masked
+
+identity:
+ ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @vand_vx_hoisted_not_swapped(<vscale x 1 x i8> %x, i8 %m, i1 zeroext %cond) {
+; CHECK-LABEL: vand_vx_hoisted_not_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a1, .LBB95_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: not a0, a0
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: .LBB95_2: # %identity
+; CHECK-NEXT: ret
+;
+; CHECK-ZVKB-LABEL: vand_vx_hoisted_not_swapped:
+; CHECK-ZVKB: # %bb.0:
+; CHECK-ZVKB-NEXT: beqz a1, .LBB95_2
+; CHECK-ZVKB-NEXT: # %bb.1: # %mask
+; CHECK-ZVKB-NEXT: not a0, a0
+; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-ZVKB-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVKB-NEXT: .LBB95_2: # %identity
+; CHECK-ZVKB-NEXT: ret
+ %a = xor i8 %m, -1
+ %head = insertelement <vscale x 1 x i8> poison, i8 %a, i32 0
+ %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and <vscale x 1 x i8> %x, %splat
+ ret <vscale x 1 x i8> %masked
+
+identity:
+ ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @vand_vv_hoisted_not(<vscale x 1 x i8> %x, <vscale x 1 x i8> %m, i1 zeroext %cond) {
+; CHECK-LABEL: vand_vv_hoisted_not:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a0, .LBB96_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnot.v v9, v9
+; CHECK-NEXT: vand.vv v8, v9, v8
+; CHECK-NEXT: .LBB96_2: # %identity
+; CHECK-NEXT: ret
+;
+; CHECK-ZVKB-LABEL: vand_vv_hoisted_not:
+; CHECK-ZVKB: # %bb.0:
+; CHECK-ZVKB-NEXT: beqz a0, .LBB96_2
+; CHECK-ZVKB-NEXT: # %bb.1: # %mask
+; CHECK-ZVKB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-ZVKB-NEXT: vnot.v v9, v9
+; CHECK-ZVKB-NEXT: vand.vv v8, v9, v8
+; CHECK-ZVKB-NEXT: .LBB96_2: # %identity
+; CHECK-ZVKB-NEXT: ret
+ %a = xor <vscale x 1 x i8> %m, splat (i8 -1)
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and <vscale x 1 x i8> %a, %x
+ ret <vscale x 1 x i8> %masked
+
+identity:
+ ret <vscale x 1 x i8> %x
+}
+
+define <vscale x 1 x i8> @vand_vv_hoisted_not_swapped(<vscale x 1 x i8> %x, <vscale x 1 x i8> %m, i1 zeroext %cond) {
+; CHECK-LABEL: vand_vv_hoisted_not_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: beqz a0, .LBB97_2
+; CHECK-NEXT: # %bb.1: # %mask
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vnot.v v9, v9
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: .LBB97_2: # %identity
+; CHECK-NEXT: ret
+;
+; CHECK-ZVKB-LABEL: vand_vv_hoisted_not_swapped:
+; CHECK-ZVKB: # %bb.0:
+; CHECK-ZVKB-NEXT: beqz a0, .LBB97_2
+; CHECK-ZVKB-NEXT: # %bb.1: # %mask
+; CHECK-ZVKB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-ZVKB-NEXT: vnot.v v9, v9
+; CHECK-ZVKB-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVKB-NEXT: .LBB97_2: # %identity
+; CHECK-ZVKB-NEXT: ret
+ %a = xor <vscale x 1 x i8> %m, splat (i8 -1)
+ br i1 %cond, label %mask, label %identity
+
+mask:
+ %masked = and <vscale x 1 x i8> %x, %a
+ ret <vscale x 1 x i8> %masked
+
+identity:
+ ret <vscale x 1 x i8> %x
+}
+
+declare i64 @llvm.vscale.i64()
+
+define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
+; CHECK-RV32-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-RV32: # %bb.0: # %entry
+; CHECK-RV32-NEXT: addi sp, sp, -32
+; CHECK-RV32-NEXT: .cfi_def_cfa_offset 32
+; CHECK-RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: .cfi_offset ra, -4
+; CHECK-RV32-NEXT: .cfi_offset s0, -8
+; CHECK-RV32-NEXT: .cfi_offset s1, -12
+; CHECK-RV32-NEXT: .cfi_offset s2, -16
+; CHECK-RV32-NEXT: .cfi_offset s3, -20
+; CHECK-RV32-NEXT: .cfi_offset s4, -24
+; CHECK-RV32-NEXT: mv s0, a0
+; CHECK-RV32-NEXT: not s2, a1
+; CHECK-RV32-NEXT: csrr s1, vlenb
+; CHECK-RV32-NEXT: srli s1, s1, 3
+; CHECK-RV32-NEXT: li a2, 1
+; CHECK-RV32-NEXT: mv a0, s1
+; CHECK-RV32-NEXT: li a1, 0
+; CHECK-RV32-NEXT: li a3, 0
+; CHECK-RV32-NEXT: call __muldi3
+; CHECK-RV32-NEXT: sltiu a0, a0, 65
+; CHECK-RV32-NEXT: seqz a1, a1
+; CHECK-RV32-NEXT: and a0, a1, a0
+; CHECK-RV32-NEXT: bnez a0, .LBB98_2
+; CHECK-RV32-NEXT: # %bb.1:
+; CHECK-RV32-NEXT: li s3, 0
+; CHECK-RV32-NEXT: li s4, 0
+; CHECK-RV32-NEXT: j .LBB98_5
+; CHECK-RV32-NEXT: .LBB98_2: # %vector.ph
+; CHECK-RV32-NEXT: li a2, 508
+; CHECK-RV32-NEXT: mv a0, s1
+; CHECK-RV32-NEXT: li a1, 0
+; CHECK-RV32-NEXT: li a3, 0
+; CHECK-RV32-NEXT: call __muldi3
+; CHECK-RV32-NEXT: li s4, 0
+; CHECK-RV32-NEXT: andi s3, a0, 256
+; CHECK-RV32-NEXT: li a2, 4
+; CHECK-RV32-NEXT: mv a0, s1
+; CHECK-RV32-NEXT: li a1, 0
+; CHECK-RV32-NEXT: li a3, 0
+; CHECK-RV32-NEXT: call __muldi3
+; CHECK-RV32-NEXT: li a3, 0
+; CHECK-RV32-NEXT: li a2, 0
+; CHECK-RV32-NEXT: vsetvli a4, zero, e32, m2, ta, ma
+; CHECK-RV32-NEXT: .LBB98_3: # %vector.body
+; CHECK-RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-RV32-NEXT: slli a4, a3, 2
+; CHECK-RV32-NEXT: add a5, a3, a0
+; CHECK-RV32-NEXT: add a2, a2, a1
+; CHECK-RV32-NEXT: add a4, s0, a4
+; CHECK-RV32-NEXT: vl2re32.v v8, (a4)
+; CHECK-RV32-NEXT: sltu a3, a5, a3
+; CHECK-RV32-NEXT: add a2, a2, a3
+; CHECK-RV32-NEXT: xor a3, a5, s3
+; CHECK-RV32-NEXT: vand.vx v8, v8, s2
+; CHECK-RV32-NEXT: or a6, a3, a2
+; CHECK-RV32-NEXT: vs2r.v v8, (a4)
+; CHECK-RV32-NEXT: mv a3, a5
+; CHECK-RV32-NEXT: bnez a6, .LBB98_3
+; CHECK-RV32-NEXT: # %bb.4: # %middle.block
+; CHECK-RV32-NEXT: bnez s3, .LBB98_6
+; CHECK-RV32-NEXT: .LBB98_5: # %for.body
+; CHECK-RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-RV32-NEXT: slli a0, s3, 2
+; CHECK-RV32-NEXT: addi s3, s3, 1
+; CHECK-RV32-NEXT: add a0, s0, a0
+; CHECK-RV32-NEXT: lw a1, 0(a0)
+; CHECK-RV32-NEXT: seqz a2, s3
+; CHECK-RV32-NEXT: add s4, s4, a2
+; CHECK-RV32-NEXT: xori a2, s3, 256
+; CHECK-RV32-NEXT: and a1, a1, s2
+; CHECK-RV32-NEXT: or a2, a2, s4
+; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: bnez a2, .LBB98_5
+; CHECK-RV32-NEXT: .LBB98_6: # %for.cond.cleanup
+; CHECK-RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: .cfi_restore ra
+; CHECK-RV32-NEXT: .cfi_restore s0
+; CHECK-RV32-NEXT: .cfi_restore s1
+; CHECK-RV32-NEXT: .cfi_restore s2
+; CHECK-RV32-NEXT: .cfi_restore s3
+; CHECK-RV32-NEXT: .cfi_restore s4
+; CHECK-RV32-NEXT: addi sp, sp, 32
+; CHECK-RV32-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-RV64: # %bb.0: # %entry
+; CHECK-RV64-NEXT: addi sp, sp, -32
+; CHECK-RV64-NEXT: .cfi_def_cfa_offset 32
+; CHECK-RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT: .cfi_offset ra, -8
+; CHECK-RV64-NEXT: .cfi_offset s0, -16
+; CHECK-RV64-NEXT: .cfi_offset s1, -24
+; CHECK-RV64-NEXT: .cfi_offset s2, -32
+; CHECK-RV64-NEXT: mv s0, a0
+; CHECK-RV64-NEXT: csrr s2, vlenb
+; CHECK-RV64-NEXT: srli a0, s2, 3
+; CHECK-RV64-NEXT: li a2, 64
+; CHECK-RV64-NEXT: not s1, a1
+; CHECK-RV64-NEXT: bgeu a2, a0, .LBB98_2
+; CHECK-RV64-NEXT: # %bb.1:
+; CHECK-RV64-NEXT: li a0, 0
+; CHECK-RV64-NEXT: j .LBB98_5
+; CHECK-RV64-NEXT: .LBB98_2: # %vector.ph
+; CHECK-RV64-NEXT: li a1, 508
+; CHECK-RV64-NEXT: call __muldi3
+; CHECK-RV64-NEXT: andi a0, a0, 256
+; CHECK-RV64-NEXT: srli a1, s2, 1
+; CHECK-RV64-NEXT: slli a2, s2, 1
+; CHECK-RV64-NEXT: mv a3, s0
+; CHECK-RV64-NEXT: mv a4, a0
+; CHECK-RV64-NEXT: vsetvli a5, zero, e32, m2, ta, ma
+; CHECK-RV64-NEXT: .LBB98_3: # %vector.body
+; CHECK-RV64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-RV64-NEXT: vl2re32.v v8, (a3)
+; CHECK-RV64-NEXT: sub a4, a4, a1
+; CHECK-RV64-NEXT: vand.vx v8, v8, s1
+; CHECK-RV64-NEXT: vs2r.v v8, (a3)
+; CHECK-RV64-NEXT: add a3, a3, a2
+; CHECK-RV64-NEXT: bnez a4, .LBB98_3
+; CHECK-RV64-NEXT: # %bb.4: # %middle.block
+; CHECK-RV64-NEXT: bnez a0, .LBB98_7
+; CHECK-RV64-NEXT: .LBB98_5: # %for.body.preheader
+; CHECK-RV64-NEXT: slli a0, a0, 2
+; CHECK-RV64-NEXT: add a0, s0, a0
+; CHECK-RV64-NEXT: addi a1, s0, 1024
+; CHECK-RV64-NEXT: .LBB98_6: # %for.body
+; CHECK-RV64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-RV64-NEXT: lw a2, 0(a0)
+; CHECK-RV64-NEXT: and a2, a2, s1
+; CHECK-RV64-NEXT: sw a2, 0(a0)
+; CHECK-RV64-NEXT: addi a0, a0, 4
+; CHECK-RV64-NEXT: bne a0, a1, .LBB98_6
+; CHECK-RV64-NEXT: .LBB98_7: # %for.cond.cleanup
+; CHECK-RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT: .cfi_restore ra
+; CHECK-RV64-NEXT: .cfi_restore s0
+; CHECK-RV64-NEXT: .cfi_restore s1
+; CHECK-RV64-NEXT: .cfi_restore s2
+; CHECK-RV64-NEXT: addi sp, sp, 32
+; CHECK-RV64-NEXT: .cfi_def_cfa_offset 0
+; CHECK-RV64-NEXT: ret
+;
+; CHECK-ZVKB32-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-ZVKB32: # %bb.0: # %entry
+; CHECK-ZVKB32-NEXT: addi sp, sp, -32
+; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 32
+; CHECK-ZVKB32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-ZVKB32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-ZVKB32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-ZVKB32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-ZVKB32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-ZVKB32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-ZVKB32-NEXT: .cfi_offset ra, -4
+; CHECK-ZVKB32-NEXT: .cfi_offset s0, -8
+; CHECK-ZVKB32-NEXT: .cfi_offset s1, -12
+; CHECK-ZVKB32-NEXT: .cfi_offset s2, -16
+; CHECK-ZVKB32-NEXT: .cfi_offset s3, -20
+; CHECK-ZVKB32-NEXT: .cfi_offset s4, -24
+; CHECK-ZVKB32-NEXT: mv s0, a0
+; CHECK-ZVKB32-NEXT: not s2, a1
+; CHECK-ZVKB32-NEXT: csrr s1, vlenb
+; CHECK-ZVKB32-NEXT: srli s1, s1, 3
+; CHECK-ZVKB32-NEXT: li a2, 1
+; CHECK-ZVKB32-NEXT: mv a0, s1
+; CHECK-ZVKB32-NEXT: li a1, 0
+; CHECK-ZVKB32-NEXT: li a3, 0
+; CHECK-ZVKB32-NEXT: call __muldi3
+; CHECK-ZVKB32-NEXT: sltiu a0, a0, 65
+; CHECK-ZVKB32-NEXT: seqz a1, a1
+; CHECK-ZVKB32-NEXT: and a0, a1, a0
+; CHECK-ZVKB32-NEXT: bnez a0, .LBB98_2
+; CHECK-ZVKB32-NEXT: # %bb.1:
+; CHECK-ZVKB32-NEXT: li s3, 0
+; CHECK-ZVKB32-NEXT: li s4, 0
+; CHECK-ZVKB32-NEXT: j .LBB98_5
+; CHECK-ZVKB32-NEXT: .LBB98_2: # %vector.ph
+; CHECK-ZVKB32-NEXT: li a2, 508
+; CHECK-ZVKB32-NEXT: mv a0, s1
+; CHECK-ZVKB32-NEXT: li a1, 0
+; CHECK-ZVKB32-NEXT: li a3, 0
+; CHECK-ZVKB32-NEXT: call __muldi3
+; CHECK-ZVKB32-NEXT: li s4, 0
+; CHECK-ZVKB32-NEXT: andi s3, a0, 256
+; CHECK-ZVKB32-NEXT: li a2, 4
+; CHECK-ZVKB32-NEXT: mv a0, s1
+; CHECK-ZVKB32-NEXT: li a1, 0
+; CHECK-ZVKB32-NEXT: li a3, 0
+; CHECK-ZVKB32-NEXT: call __muldi3
+; CHECK-ZVKB32-NEXT: li a3, 0
+; CHECK-ZVKB32-NEXT: li a2, 0
+; CHECK-ZVKB32-NEXT: vsetvli a4, zero, e32, m2, ta, ma
+; CHECK-ZVKB32-NEXT: .LBB98_3: # %vector.body
+; CHECK-ZVKB32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB32-NEXT: slli a4, a3, 2
+; CHECK-ZVKB32-NEXT: add a5, a3, a0
+; CHECK-ZVKB32-NEXT: add a2, a2, a1
+; CHECK-ZVKB32-NEXT: add a4, s0, a4
+; CHECK-ZVKB32-NEXT: vl2re32.v v8, (a4)
+; CHECK-ZVKB32-NEXT: sltu a3, a5, a3
+; CHECK-ZVKB32-NEXT: add a2, a2, a3
+; CHECK-ZVKB32-NEXT: xor a3, a5, s3
+; CHECK-ZVKB32-NEXT: vand.vx v8, v8, s2
+; CHECK-ZVKB32-NEXT: or a6, a3, a2
+; CHECK-ZVKB32-NEXT: vs2r.v v8, (a4)
+; CHECK-ZVKB32-NEXT: mv a3, a5
+; CHECK-ZVKB32-NEXT: bnez a6, .LBB98_3
+; CHECK-ZVKB32-NEXT: # %bb.4: # %middle.block
+; CHECK-ZVKB32-NEXT: bnez s3, .LBB98_6
+; CHECK-ZVKB32-NEXT: .LBB98_5: # %for.body
+; CHECK-ZVKB32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB32-NEXT: slli a0, s3, 2
+; CHECK-ZVKB32-NEXT: addi s3, s3, 1
+; CHECK-ZVKB32-NEXT: add a0, s0, a0
+; CHECK-ZVKB32-NEXT: lw a1, 0(a0)
+; CHECK-ZVKB32-NEXT: seqz a2, s3
+; CHECK-ZVKB32-NEXT: add s4, s4, a2
+; CHECK-ZVKB32-NEXT: xori a2, s3, 256
+; CHECK-ZVKB32-NEXT: and a1, a1, s2
+; CHECK-ZVKB32-NEXT: or a2, a2, s4
+; CHECK-ZVKB32-NEXT: sw a1, 0(a0)
+; CHECK-ZVKB32-NEXT: bnez a2, .LBB98_5
+; CHECK-ZVKB32-NEXT: .LBB98_6: # %for.cond.cleanup
+; CHECK-ZVKB32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-ZVKB32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-ZVKB32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-ZVKB32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-ZVKB32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-ZVKB32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-ZVKB32-NEXT: .cfi_restore ra
+; CHECK-ZVKB32-NEXT: .cfi_restore s0
+; CHECK-ZVKB32-NEXT: .cfi_restore s1
+; CHECK-ZVKB32-NEXT: .cfi_restore s2
+; CHECK-ZVKB32-NEXT: .cfi_restore s3
+; CHECK-ZVKB32-NEXT: .cfi_restore s4
+; CHECK-ZVKB32-NEXT: addi sp, sp, 32
+; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 0
+; CHECK-ZVKB32-NEXT: ret
+;
+; CHECK-ZVKB64-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-ZVKB64: # %bb.0: # %entry
+; CHECK-ZVKB64-NEXT: addi sp, sp, -32
+; CHECK-ZVKB64-NEXT: .cfi_def_cfa_offset 32
+; CHECK-ZVKB64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-ZVKB64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-ZVKB64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-ZVKB64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-ZVKB64-NEXT: .cfi_offset ra, -8
+; CHECK-ZVKB64-NEXT: .cfi_offset s0, -16
+; CHECK-ZVKB64-NEXT: .cfi_offset s1, -24
+; CHECK-ZVKB64-NEXT: .cfi_offset s2, -32
+; CHECK-ZVKB64-NEXT: mv s0, a0
+; CHECK-ZVKB64-NEXT: csrr s2, vlenb
+; CHECK-ZVKB64-NEXT: srli a0, s2, 3
+; CHECK-ZVKB64-NEXT: li a2, 64
+; CHECK-ZVKB64-NEXT: not s1, a1
+; CHECK-ZVKB64-NEXT: bgeu a2, a0, .LBB98_2
+; CHECK-ZVKB64-NEXT: # %bb.1:
+; CHECK-ZVKB64-NEXT: li a0, 0
+; CHECK-ZVKB64-NEXT: j .LBB98_5
+; CHECK-ZVKB64-NEXT: .LBB98_2: # %vector.ph
+; CHECK-ZVKB64-NEXT: li a1, 508
+; CHECK-ZVKB64-NEXT: call __muldi3
+; CHECK-ZVKB64-NEXT: andi a0, a0, 256
+; CHECK-ZVKB64-NEXT: srli a1, s2, 1
+; CHECK-ZVKB64-NEXT: slli a2, s2, 1
+; CHECK-ZVKB64-NEXT: mv a3, s0
+; CHECK-ZVKB64-NEXT: mv a4, a0
+; CHECK-ZVKB64-NEXT: vsetvli a5, zero, e32, m2, ta, ma
+; CHECK-ZVKB64-NEXT: .LBB98_3: # %vector.body
+; CHECK-ZVKB64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB64-NEXT: vl2re32.v v8, (a3)
+; CHECK-ZVKB64-NEXT: sub a4, a4, a1
+; CHECK-ZVKB64-NEXT: vand.vx v8, v8, s1
+; CHECK-ZVKB64-NEXT: vs2r.v v8, (a3)
+; CHECK-ZVKB64-NEXT: add a3, a3, a2
+; CHECK-ZVKB64-NEXT: bnez a4, .LBB98_3
+; CHECK-ZVKB64-NEXT: # %bb.4: # %middle.block
+; CHECK-ZVKB64-NEXT: bnez a0, .LBB98_7
+; CHECK-ZVKB64-NEXT: .LBB98_5: # %for.body.preheader
+; CHECK-ZVKB64-NEXT: slli a0, a0, 2
+; CHECK-ZVKB64-NEXT: add a0, s0, a0
+; CHECK-ZVKB64-NEXT: addi a1, s0, 1024
+; CHECK-ZVKB64-NEXT: .LBB98_6: # %for.body
+; CHECK-ZVKB64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB64-NEXT: lw a2, 0(a0)
+; CHECK-ZVKB64-NEXT: and a2, a2, s1
+; CHECK-ZVKB64-NEXT: sw a2, 0(a0)
+; CHECK-ZVKB64-NEXT: addi a0, a0, 4
+; CHECK-ZVKB64-NEXT: bne a0, a1, .LBB98_6
+; CHECK-ZVKB64-NEXT: .LBB98_7: # %for.cond.cleanup
+; CHECK-ZVKB64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-ZVKB64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-ZVKB64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-ZVKB64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-ZVKB64-NEXT: .cfi_restore ra
+; CHECK-ZVKB64-NEXT: .cfi_restore s0
+; CHECK-ZVKB64-NEXT: .cfi_restore s1
+; CHECK-ZVKB64-NEXT: .cfi_restore s2
+; CHECK-ZVKB64-NEXT: addi sp, sp, 32
+; CHECK-ZVKB64-NEXT: .cfi_def_cfa_offset 0
+; CHECK-ZVKB64-NEXT: ret
+entry:
+ %not = xor i32 %mask, -1
+ %vscale = tail call i64 @llvm.vscale.i64()
+ %min.iters.check = icmp samesign ugt i64 %vscale, 64
+ br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
+
+vector.ph:
+ %1 = tail call i64 @llvm.vscale.i64()
+ %.neg = mul nuw nsw i64 %1, 508
+ %n.vec = and i64 %.neg, 256
+ %2 = tail call i64 @llvm.vscale.i64()
+ %3 = shl nuw nsw i64 %2, 2
+ %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %not, i64 0
+ %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+ %4 = getelementptr inbounds nuw i32, ptr %a, i64 %index
+ %wide.load = load <vscale x 4 x i32>, ptr %4, align 4
+ %5 = and <vscale x 4 x i32> %wide.load, %broadcast.splat
+ store <vscale x 4 x i32> %5, ptr %4, align 4
+ %index.next = add nuw i64 %index, %3
+ %6 = icmp eq i64 %index.next, %n.vec
+ br i1 %6, label %middle.block, label %vector.body
+
+middle.block:
+ %cmp.n.not = icmp eq i64 %n.vec, 0
+ br i1 %cmp.n.not, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
+ br label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %indvars.iv
+ %7 = load i32, ptr %arrayidx, align 4
+ %and = and i32 %7, %not
+ store i32 %and, ptr %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 256
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-ZVKB-NOZBB32: {{.*}}
+; CHECK-ZVKB-NOZBB64: {{.*}}
+; CHECK-ZVKB-ZBB32: {{.*}}
+; CHECK-ZVKB-ZBB64: {{.*}}
>From 55e7b825e45ba643ed16400f07cd479e13a79d16 Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Mon, 17 Mar 2025 16:14:23 +0100
Subject: [PATCH 2/2] [RISCV] Sink NOT to be fold into ANDN/ORN/XNOR/VANDN
Undoes a negation being hoisted out of a loop, so that it can be fold
into an inverted bitwise operation in the loop.
Implements #108840 on RISC-V
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 33 ++
llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll | 318 +++++++----
llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll | 288 ++++++----
llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll | 500 ++++++++++++------
4 files changed, 763 insertions(+), 376 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 11a658758a9cb..1060093043278 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2779,6 +2779,39 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
using namespace llvm::PatternMatch;
+ if (I->isBitwiseLogicOp()) {
+ if (!I->getType()->isVectorTy()) {
+ if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
+ for (auto &Op : I->operands()) {
+ // (and/or/xor X, (not Y)) -> (andn/orn/xnor X, Y)
+ if (match(Op.get(), m_Not(m_Value()))) {
+ Ops.push_back(&Op);
+ return true;
+ }
+ }
+ }
+ } else if (I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
+ for (auto &Op : I->operands()) {
+ // (and X, (not Y)) -> (vandn.vv X, Y)
+ if (match(Op.get(), m_Not(m_Value()))) {
+ Ops.push_back(&Op);
+ return true;
+ }
+ // (and X, (splat (not Y))) -> (vandn.vx X, Y)
+ if (match(Op.get(), m_Shuffle(m_InsertElt(m_Value(), m_Not(m_Value()),
+ m_ZeroInt()),
+ m_Value(), m_ZeroMask()))) {
+ Use &InsertElt = cast<Instruction>(Op)->getOperandUse(0);
+ Use &Not = cast<Instruction>(InsertElt)->getOperandUse(1);
+ Ops.push_back(&Not);
+ Ops.push_back(&InsertElt);
+ Ops.push_back(&Op);
+ return true;
+ }
+ }
+ }
+ }
+
if (!I->getType()->isVectorTy() || !ST->hasVInstructions())
return false;
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
index a319156f8d1b8..88bb19f499ab5 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
@@ -440,14 +440,22 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
}
define i32 @and_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB24_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: .LBB24_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: and_hoisted_not_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a2, .LBB24_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: .LBB24_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: and_hoisted_not_i32:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a2, .LBB24_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT: .LBB24_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -460,14 +468,22 @@ identity:
}
define i32 @and_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i32_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB25_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: .LBB25_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: and_hoisted_not_i32_swapped:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a2, .LBB25_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: .LBB25_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: and_hoisted_not_i32_swapped:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a2, .LBB25_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT: .LBB25_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -480,16 +496,25 @@ identity:
}
define i64 @and_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a4, .LBB26_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a3, a3
-; CHECK-NEXT: not a2, a2
-; CHECK-NEXT: and a0, a2, a0
-; CHECK-NEXT: and a1, a3, a1
-; CHECK-NEXT: .LBB26_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: and_hoisted_not_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a4, .LBB26_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: .LBB26_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: and_hoisted_not_i64:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB26_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT: andn a1, a1, a3
+; RV32ZBB-ZBKB-NEXT: .LBB26_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -502,16 +527,25 @@ identity:
}
define i64 @and_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i64_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a4, .LBB27_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a3, a3
-; CHECK-NEXT: not a2, a2
-; CHECK-NEXT: and a0, a0, a2
-; CHECK-NEXT: and a1, a1, a3
-; CHECK-NEXT: .LBB27_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: and_hoisted_not_i64_swapped:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a4, .LBB27_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: .LBB27_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: and_hoisted_not_i64_swapped:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB27_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: andn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT: andn a1, a1, a3
+; RV32ZBB-ZBKB-NEXT: .LBB27_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -524,14 +558,22 @@ identity:
}
define i32 @or_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB28_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: or a0, a1, a0
-; CHECK-NEXT: .LBB28_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: or_hoisted_not_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a2, .LBB28_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: .LBB28_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: or_hoisted_not_i32:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a2, .LBB28_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: orn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT: .LBB28_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -544,14 +586,22 @@ identity:
}
define i32 @or_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i32_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB29_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: .LBB29_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: or_hoisted_not_i32_swapped:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a2, .LBB29_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: .LBB29_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: or_hoisted_not_i32_swapped:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a2, .LBB29_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: orn a0, a0, a1
+; RV32ZBB-ZBKB-NEXT: .LBB29_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -564,16 +614,25 @@ identity:
}
define i64 @or_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a4, .LBB30_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a3, a3
-; CHECK-NEXT: not a2, a2
-; CHECK-NEXT: or a0, a2, a0
-; CHECK-NEXT: or a1, a3, a1
-; CHECK-NEXT: .LBB30_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: or_hoisted_not_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a4, .LBB30_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: or a0, a2, a0
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: .LBB30_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: or_hoisted_not_i64:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB30_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: orn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT: orn a1, a1, a3
+; RV32ZBB-ZBKB-NEXT: .LBB30_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -586,16 +645,25 @@ identity:
}
define i64 @or_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i64_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a4, .LBB31_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a3, a3
-; CHECK-NEXT: not a2, a2
-; CHECK-NEXT: or a0, a0, a2
-; CHECK-NEXT: or a1, a1, a3
-; CHECK-NEXT: .LBB31_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: or_hoisted_not_i64_swapped:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a4, .LBB31_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: .LBB31_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: or_hoisted_not_i64_swapped:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB31_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: orn a0, a0, a2
+; RV32ZBB-ZBKB-NEXT: orn a1, a1, a3
+; RV32ZBB-ZBKB-NEXT: .LBB31_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -608,14 +676,22 @@ identity:
}
define i32 @xor_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB32_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: xor a0, a1, a0
-; CHECK-NEXT: .LBB32_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: xor_hoisted_not_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a2, .LBB32_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: xor a0, a1, a0
+; RV32I-NEXT: .LBB32_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: xor_hoisted_not_i32:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a2, .LBB32_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: xnor a0, a1, a0
+; RV32ZBB-ZBKB-NEXT: .LBB32_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -628,14 +704,22 @@ identity:
}
define i32 @xor_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i32_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB33_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: xor a0, a0, a1
-; CHECK-NEXT: .LBB33_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: xor_hoisted_not_i32_swapped:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a2, .LBB33_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: .LBB33_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: xor_hoisted_not_i32_swapped:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a2, .LBB33_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: xnor a0, a1, a0
+; RV32ZBB-ZBKB-NEXT: .LBB33_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -648,16 +732,25 @@ identity:
}
define i64 @xor_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a4, .LBB34_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a3, a3
-; CHECK-NEXT: not a2, a2
-; CHECK-NEXT: xor a0, a2, a0
-; CHECK-NEXT: xor a1, a3, a1
-; CHECK-NEXT: .LBB34_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: xor_hoisted_not_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a4, .LBB34_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: xor a0, a2, a0
+; RV32I-NEXT: xor a1, a3, a1
+; RV32I-NEXT: .LBB34_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: xor_hoisted_not_i64:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB34_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: xnor a0, a2, a0
+; RV32ZBB-ZBKB-NEXT: xnor a1, a3, a1
+; RV32ZBB-ZBKB-NEXT: .LBB34_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -670,16 +763,25 @@ identity:
}
define i64 @xor_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i64_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a4, .LBB35_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a3, a3
-; CHECK-NEXT: not a2, a2
-; CHECK-NEXT: xor a0, a0, a2
-; CHECK-NEXT: xor a1, a1, a3
-; CHECK-NEXT: .LBB35_2: # %identity
-; CHECK-NEXT: ret
+; RV32I-LABEL: xor_hoisted_not_i64_swapped:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a4, .LBB35_2
+; RV32I-NEXT: # %bb.1: # %mask
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: xor a1, a1, a3
+; RV32I-NEXT: .LBB35_2: # %identity
+; RV32I-NEXT: ret
+;
+; RV32ZBB-ZBKB-LABEL: xor_hoisted_not_i64_swapped:
+; RV32ZBB-ZBKB: # %bb.0:
+; RV32ZBB-ZBKB-NEXT: beqz a4, .LBB35_2
+; RV32ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV32ZBB-ZBKB-NEXT: xnor a0, a2, a0
+; RV32ZBB-ZBKB-NEXT: xnor a1, a3, a1
+; RV32ZBB-ZBKB-NEXT: .LBB35_2: # %identity
+; RV32ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
index 23b2c2d70a37a..4ca637b788a45 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
@@ -556,14 +556,22 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
}
define i32 @and_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB32_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: .LBB32_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: and_hoisted_not_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB32_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: .LBB32_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: and_hoisted_not_i32:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB32_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB32_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -576,14 +584,22 @@ identity:
}
define i32 @and_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i32_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB33_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: .LBB33_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: and_hoisted_not_i32_swapped:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB33_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: .LBB33_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: and_hoisted_not_i32_swapped:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB33_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB33_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -596,14 +612,22 @@ identity:
}
define i64 @and_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB34_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: .LBB34_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: and_hoisted_not_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB34_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: .LBB34_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: and_hoisted_not_i64:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB34_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB34_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -616,14 +640,22 @@ identity:
}
define i64 @and_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: and_hoisted_not_i64_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB35_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: .LBB35_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: and_hoisted_not_i64_swapped:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB35_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: .LBB35_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: and_hoisted_not_i64_swapped:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB35_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: andn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB35_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -636,14 +668,22 @@ identity:
}
define i32 @or_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB36_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: or a0, a1, a0
-; CHECK-NEXT: .LBB36_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: or_hoisted_not_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB36_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: .LBB36_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: or_hoisted_not_i32:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB36_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB36_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -656,14 +696,22 @@ identity:
}
define i32 @or_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i32_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB37_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: .LBB37_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: or_hoisted_not_i32_swapped:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB37_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: .LBB37_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: or_hoisted_not_i32_swapped:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB37_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB37_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -676,14 +724,22 @@ identity:
}
define i64 @or_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB38_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: or a0, a1, a0
-; CHECK-NEXT: .LBB38_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: or_hoisted_not_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB38_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: .LBB38_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: or_hoisted_not_i64:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB38_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB38_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -696,14 +752,22 @@ identity:
}
define i64 @or_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: or_hoisted_not_i64_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB39_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: .LBB39_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: or_hoisted_not_i64_swapped:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB39_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: .LBB39_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: or_hoisted_not_i64_swapped:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB39_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: orn a0, a0, a1
+; RV64ZBB-ZBKB-NEXT: .LBB39_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -716,14 +780,22 @@ identity:
}
define i32 @xor_hoisted_not_i32(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB40_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: xor a0, a1, a0
-; CHECK-NEXT: .LBB40_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: xor_hoisted_not_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB40_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: .LBB40_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: xor_hoisted_not_i32:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB40_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: xnor a0, a1, a0
+; RV64ZBB-ZBKB-NEXT: .LBB40_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -736,14 +808,22 @@ identity:
}
define i32 @xor_hoisted_not_i32_swapped(i32 %x, i32 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i32_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB41_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: xor a0, a0, a1
-; CHECK-NEXT: .LBB41_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: xor_hoisted_not_i32_swapped:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB41_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: .LBB41_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: xor_hoisted_not_i32_swapped:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB41_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: xnor a0, a1, a0
+; RV64ZBB-ZBKB-NEXT: .LBB41_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i32 %m, -1
br i1 %cond, label %mask, label %identity
@@ -756,14 +836,22 @@ identity:
}
define i64 @xor_hoisted_not_i64(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB42_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: xor a0, a1, a0
-; CHECK-NEXT: .LBB42_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: xor_hoisted_not_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB42_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: xor a0, a1, a0
+; RV64I-NEXT: .LBB42_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: xor_hoisted_not_i64:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB42_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: xnor a0, a1, a0
+; RV64ZBB-ZBKB-NEXT: .LBB42_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
@@ -776,14 +864,22 @@ identity:
}
define i64 @xor_hoisted_not_i64_swapped(i64 %x, i64 %m, i1 zeroext %cond) {
-; CHECK-LABEL: xor_hoisted_not_i64_swapped:
-; CHECK: # %bb.0:
-; CHECK-NEXT: beqz a2, .LBB43_2
-; CHECK-NEXT: # %bb.1: # %mask
-; CHECK-NEXT: not a1, a1
-; CHECK-NEXT: xor a0, a0, a1
-; CHECK-NEXT: .LBB43_2: # %identity
-; CHECK-NEXT: ret
+; RV64I-LABEL: xor_hoisted_not_i64_swapped:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a2, .LBB43_2
+; RV64I-NEXT: # %bb.1: # %mask
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: .LBB43_2: # %identity
+; RV64I-NEXT: ret
+;
+; RV64ZBB-ZBKB-LABEL: xor_hoisted_not_i64_swapped:
+; RV64ZBB-ZBKB: # %bb.0:
+; RV64ZBB-ZBKB-NEXT: beqz a2, .LBB43_2
+; RV64ZBB-ZBKB-NEXT: # %bb.1: # %mask
+; RV64ZBB-ZBKB-NEXT: xnor a0, a1, a0
+; RV64ZBB-ZBKB-NEXT: .LBB43_2: # %identity
+; RV64ZBB-ZBKB-NEXT: ret
%a = xor i64 %m, -1
br i1 %cond, label %mask, label %identity
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
index d4f9f41cd4ab7..59191238a2d72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
@@ -2125,9 +2125,8 @@ define <vscale x 1 x i8> @vand_vx_hoisted_not(<vscale x 1 x i8> %x, i8 %m, i1 ze
; CHECK-ZVKB: # %bb.0:
; CHECK-ZVKB-NEXT: beqz a1, .LBB94_2
; CHECK-ZVKB-NEXT: # %bb.1: # %mask
-; CHECK-ZVKB-NEXT: not a0, a0
; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-ZVKB-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVKB-NEXT: vandn.vx v8, v8, a0
; CHECK-ZVKB-NEXT: .LBB94_2: # %identity
; CHECK-ZVKB-NEXT: ret
%a = xor i8 %m, -1
@@ -2158,9 +2157,8 @@ define <vscale x 1 x i8> @vand_vx_hoisted_not_swapped(<vscale x 1 x i8> %x, i8 %
; CHECK-ZVKB: # %bb.0:
; CHECK-ZVKB-NEXT: beqz a1, .LBB95_2
; CHECK-ZVKB-NEXT: # %bb.1: # %mask
-; CHECK-ZVKB-NEXT: not a0, a0
; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-ZVKB-NEXT: vand.vx v8, v8, a0
+; CHECK-ZVKB-NEXT: vandn.vx v8, v8, a0
; CHECK-ZVKB-NEXT: .LBB95_2: # %identity
; CHECK-ZVKB-NEXT: ret
%a = xor i8 %m, -1
@@ -2192,8 +2190,7 @@ define <vscale x 1 x i8> @vand_vv_hoisted_not(<vscale x 1 x i8> %x, <vscale x 1
; CHECK-ZVKB-NEXT: beqz a0, .LBB96_2
; CHECK-ZVKB-NEXT: # %bb.1: # %mask
; CHECK-ZVKB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-ZVKB-NEXT: vnot.v v9, v9
-; CHECK-ZVKB-NEXT: vand.vv v8, v9, v8
+; CHECK-ZVKB-NEXT: vandn.vv v8, v8, v9
; CHECK-ZVKB-NEXT: .LBB96_2: # %identity
; CHECK-ZVKB-NEXT: ret
%a = xor <vscale x 1 x i8> %m, splat (i8 -1)
@@ -2223,8 +2220,7 @@ define <vscale x 1 x i8> @vand_vv_hoisted_not_swapped(<vscale x 1 x i8> %x, <vsc
; CHECK-ZVKB-NEXT: beqz a0, .LBB97_2
; CHECK-ZVKB-NEXT: # %bb.1: # %mask
; CHECK-ZVKB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-ZVKB-NEXT: vnot.v v9, v9
-; CHECK-ZVKB-NEXT: vand.vv v8, v8, v9
+; CHECK-ZVKB-NEXT: vandn.vv v8, v8, v9
; CHECK-ZVKB-NEXT: .LBB97_2: # %identity
; CHECK-ZVKB-NEXT: ret
%a = xor <vscale x 1 x i8> %m, splat (i8 -1)
@@ -2401,165 +2397,330 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-RV64-NEXT: .cfi_def_cfa_offset 0
; CHECK-RV64-NEXT: ret
;
-; CHECK-ZVKB32-LABEL: vand_vx_loop_hoisted_not:
-; CHECK-ZVKB32: # %bb.0: # %entry
-; CHECK-ZVKB32-NEXT: addi sp, sp, -32
-; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 32
-; CHECK-ZVKB32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK-ZVKB32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; CHECK-ZVKB32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; CHECK-ZVKB32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; CHECK-ZVKB32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; CHECK-ZVKB32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; CHECK-ZVKB32-NEXT: .cfi_offset ra, -4
-; CHECK-ZVKB32-NEXT: .cfi_offset s0, -8
-; CHECK-ZVKB32-NEXT: .cfi_offset s1, -12
-; CHECK-ZVKB32-NEXT: .cfi_offset s2, -16
-; CHECK-ZVKB32-NEXT: .cfi_offset s3, -20
-; CHECK-ZVKB32-NEXT: .cfi_offset s4, -24
-; CHECK-ZVKB32-NEXT: mv s0, a0
-; CHECK-ZVKB32-NEXT: not s2, a1
-; CHECK-ZVKB32-NEXT: csrr s1, vlenb
-; CHECK-ZVKB32-NEXT: srli s1, s1, 3
-; CHECK-ZVKB32-NEXT: li a2, 1
-; CHECK-ZVKB32-NEXT: mv a0, s1
-; CHECK-ZVKB32-NEXT: li a1, 0
-; CHECK-ZVKB32-NEXT: li a3, 0
-; CHECK-ZVKB32-NEXT: call __muldi3
-; CHECK-ZVKB32-NEXT: sltiu a0, a0, 65
-; CHECK-ZVKB32-NEXT: seqz a1, a1
-; CHECK-ZVKB32-NEXT: and a0, a1, a0
-; CHECK-ZVKB32-NEXT: bnez a0, .LBB98_2
-; CHECK-ZVKB32-NEXT: # %bb.1:
-; CHECK-ZVKB32-NEXT: li s3, 0
-; CHECK-ZVKB32-NEXT: li s4, 0
-; CHECK-ZVKB32-NEXT: j .LBB98_5
-; CHECK-ZVKB32-NEXT: .LBB98_2: # %vector.ph
-; CHECK-ZVKB32-NEXT: li a2, 508
-; CHECK-ZVKB32-NEXT: mv a0, s1
-; CHECK-ZVKB32-NEXT: li a1, 0
-; CHECK-ZVKB32-NEXT: li a3, 0
-; CHECK-ZVKB32-NEXT: call __muldi3
-; CHECK-ZVKB32-NEXT: li s4, 0
-; CHECK-ZVKB32-NEXT: andi s3, a0, 256
-; CHECK-ZVKB32-NEXT: li a2, 4
-; CHECK-ZVKB32-NEXT: mv a0, s1
-; CHECK-ZVKB32-NEXT: li a1, 0
-; CHECK-ZVKB32-NEXT: li a3, 0
-; CHECK-ZVKB32-NEXT: call __muldi3
-; CHECK-ZVKB32-NEXT: li a3, 0
-; CHECK-ZVKB32-NEXT: li a2, 0
-; CHECK-ZVKB32-NEXT: vsetvli a4, zero, e32, m2, ta, ma
-; CHECK-ZVKB32-NEXT: .LBB98_3: # %vector.body
-; CHECK-ZVKB32-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-ZVKB32-NEXT: slli a4, a3, 2
-; CHECK-ZVKB32-NEXT: add a5, a3, a0
-; CHECK-ZVKB32-NEXT: add a2, a2, a1
-; CHECK-ZVKB32-NEXT: add a4, s0, a4
-; CHECK-ZVKB32-NEXT: vl2re32.v v8, (a4)
-; CHECK-ZVKB32-NEXT: sltu a3, a5, a3
-; CHECK-ZVKB32-NEXT: add a2, a2, a3
-; CHECK-ZVKB32-NEXT: xor a3, a5, s3
-; CHECK-ZVKB32-NEXT: vand.vx v8, v8, s2
-; CHECK-ZVKB32-NEXT: or a6, a3, a2
-; CHECK-ZVKB32-NEXT: vs2r.v v8, (a4)
-; CHECK-ZVKB32-NEXT: mv a3, a5
-; CHECK-ZVKB32-NEXT: bnez a6, .LBB98_3
-; CHECK-ZVKB32-NEXT: # %bb.4: # %middle.block
-; CHECK-ZVKB32-NEXT: bnez s3, .LBB98_6
-; CHECK-ZVKB32-NEXT: .LBB98_5: # %for.body
-; CHECK-ZVKB32-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-ZVKB32-NEXT: slli a0, s3, 2
-; CHECK-ZVKB32-NEXT: addi s3, s3, 1
-; CHECK-ZVKB32-NEXT: add a0, s0, a0
-; CHECK-ZVKB32-NEXT: lw a1, 0(a0)
-; CHECK-ZVKB32-NEXT: seqz a2, s3
-; CHECK-ZVKB32-NEXT: add s4, s4, a2
-; CHECK-ZVKB32-NEXT: xori a2, s3, 256
-; CHECK-ZVKB32-NEXT: and a1, a1, s2
-; CHECK-ZVKB32-NEXT: or a2, a2, s4
-; CHECK-ZVKB32-NEXT: sw a1, 0(a0)
-; CHECK-ZVKB32-NEXT: bnez a2, .LBB98_5
-; CHECK-ZVKB32-NEXT: .LBB98_6: # %for.cond.cleanup
-; CHECK-ZVKB32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK-ZVKB32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; CHECK-ZVKB32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK-ZVKB32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; CHECK-ZVKB32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; CHECK-ZVKB32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; CHECK-ZVKB32-NEXT: .cfi_restore ra
-; CHECK-ZVKB32-NEXT: .cfi_restore s0
-; CHECK-ZVKB32-NEXT: .cfi_restore s1
-; CHECK-ZVKB32-NEXT: .cfi_restore s2
-; CHECK-ZVKB32-NEXT: .cfi_restore s3
-; CHECK-ZVKB32-NEXT: .cfi_restore s4
-; CHECK-ZVKB32-NEXT: addi sp, sp, 32
-; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 0
-; CHECK-ZVKB32-NEXT: ret
-;
-; CHECK-ZVKB64-LABEL: vand_vx_loop_hoisted_not:
-; CHECK-ZVKB64: # %bb.0: # %entry
-; CHECK-ZVKB64-NEXT: addi sp, sp, -32
-; CHECK-ZVKB64-NEXT: .cfi_def_cfa_offset 32
-; CHECK-ZVKB64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-ZVKB64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; CHECK-ZVKB64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-ZVKB64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
-; CHECK-ZVKB64-NEXT: .cfi_offset ra, -8
-; CHECK-ZVKB64-NEXT: .cfi_offset s0, -16
-; CHECK-ZVKB64-NEXT: .cfi_offset s1, -24
-; CHECK-ZVKB64-NEXT: .cfi_offset s2, -32
-; CHECK-ZVKB64-NEXT: mv s0, a0
-; CHECK-ZVKB64-NEXT: csrr s2, vlenb
-; CHECK-ZVKB64-NEXT: srli a0, s2, 3
-; CHECK-ZVKB64-NEXT: li a2, 64
-; CHECK-ZVKB64-NEXT: not s1, a1
-; CHECK-ZVKB64-NEXT: bgeu a2, a0, .LBB98_2
-; CHECK-ZVKB64-NEXT: # %bb.1:
-; CHECK-ZVKB64-NEXT: li a0, 0
-; CHECK-ZVKB64-NEXT: j .LBB98_5
-; CHECK-ZVKB64-NEXT: .LBB98_2: # %vector.ph
-; CHECK-ZVKB64-NEXT: li a1, 508
-; CHECK-ZVKB64-NEXT: call __muldi3
-; CHECK-ZVKB64-NEXT: andi a0, a0, 256
-; CHECK-ZVKB64-NEXT: srli a1, s2, 1
-; CHECK-ZVKB64-NEXT: slli a2, s2, 1
-; CHECK-ZVKB64-NEXT: mv a3, s0
-; CHECK-ZVKB64-NEXT: mv a4, a0
-; CHECK-ZVKB64-NEXT: vsetvli a5, zero, e32, m2, ta, ma
-; CHECK-ZVKB64-NEXT: .LBB98_3: # %vector.body
-; CHECK-ZVKB64-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-ZVKB64-NEXT: vl2re32.v v8, (a3)
-; CHECK-ZVKB64-NEXT: sub a4, a4, a1
-; CHECK-ZVKB64-NEXT: vand.vx v8, v8, s1
-; CHECK-ZVKB64-NEXT: vs2r.v v8, (a3)
-; CHECK-ZVKB64-NEXT: add a3, a3, a2
-; CHECK-ZVKB64-NEXT: bnez a4, .LBB98_3
-; CHECK-ZVKB64-NEXT: # %bb.4: # %middle.block
-; CHECK-ZVKB64-NEXT: bnez a0, .LBB98_7
-; CHECK-ZVKB64-NEXT: .LBB98_5: # %for.body.preheader
-; CHECK-ZVKB64-NEXT: slli a0, a0, 2
-; CHECK-ZVKB64-NEXT: add a0, s0, a0
-; CHECK-ZVKB64-NEXT: addi a1, s0, 1024
-; CHECK-ZVKB64-NEXT: .LBB98_6: # %for.body
-; CHECK-ZVKB64-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-ZVKB64-NEXT: lw a2, 0(a0)
-; CHECK-ZVKB64-NEXT: and a2, a2, s1
-; CHECK-ZVKB64-NEXT: sw a2, 0(a0)
-; CHECK-ZVKB64-NEXT: addi a0, a0, 4
-; CHECK-ZVKB64-NEXT: bne a0, a1, .LBB98_6
-; CHECK-ZVKB64-NEXT: .LBB98_7: # %for.cond.cleanup
-; CHECK-ZVKB64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-ZVKB64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; CHECK-ZVKB64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-ZVKB64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
-; CHECK-ZVKB64-NEXT: .cfi_restore ra
-; CHECK-ZVKB64-NEXT: .cfi_restore s0
-; CHECK-ZVKB64-NEXT: .cfi_restore s1
-; CHECK-ZVKB64-NEXT: .cfi_restore s2
-; CHECK-ZVKB64-NEXT: addi sp, sp, 32
-; CHECK-ZVKB64-NEXT: .cfi_def_cfa_offset 0
-; CHECK-ZVKB64-NEXT: ret
+; CHECK-ZVKB-NOZBB32-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-ZVKB-NOZBB32: # %bb.0: # %entry
+; CHECK-ZVKB-NOZBB32-NEXT: addi sp, sp, -32
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_def_cfa_offset 32
+; CHECK-ZVKB-NOZBB32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-NOZBB32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-NOZBB32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-NOZBB32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-NOZBB32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-NOZBB32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_offset ra, -4
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_offset s0, -8
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_offset s1, -12
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_offset s2, -16
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_offset s3, -20
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_offset s4, -24
+; CHECK-ZVKB-NOZBB32-NEXT: mv s1, a1
+; CHECK-ZVKB-NOZBB32-NEXT: mv s0, a0
+; CHECK-ZVKB-NOZBB32-NEXT: csrr a0, vlenb
+; CHECK-ZVKB-NOZBB32-NEXT: srli s2, a0, 3
+; CHECK-ZVKB-NOZBB32-NEXT: li a2, 1
+; CHECK-ZVKB-NOZBB32-NEXT: mv a0, s2
+; CHECK-ZVKB-NOZBB32-NEXT: li a1, 0
+; CHECK-ZVKB-NOZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-NOZBB32-NEXT: call __muldi3
+; CHECK-ZVKB-NOZBB32-NEXT: sltiu a0, a0, 65
+; CHECK-ZVKB-NOZBB32-NEXT: seqz a1, a1
+; CHECK-ZVKB-NOZBB32-NEXT: and a0, a1, a0
+; CHECK-ZVKB-NOZBB32-NEXT: bnez a0, .LBB98_2
+; CHECK-ZVKB-NOZBB32-NEXT: # %bb.1:
+; CHECK-ZVKB-NOZBB32-NEXT: li s3, 0
+; CHECK-ZVKB-NOZBB32-NEXT: li s4, 0
+; CHECK-ZVKB-NOZBB32-NEXT: j .LBB98_5
+; CHECK-ZVKB-NOZBB32-NEXT: .LBB98_2: # %vector.ph
+; CHECK-ZVKB-NOZBB32-NEXT: li a2, 508
+; CHECK-ZVKB-NOZBB32-NEXT: mv a0, s2
+; CHECK-ZVKB-NOZBB32-NEXT: li a1, 0
+; CHECK-ZVKB-NOZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-NOZBB32-NEXT: call __muldi3
+; CHECK-ZVKB-NOZBB32-NEXT: li s4, 0
+; CHECK-ZVKB-NOZBB32-NEXT: andi s3, a0, 256
+; CHECK-ZVKB-NOZBB32-NEXT: li a2, 4
+; CHECK-ZVKB-NOZBB32-NEXT: mv a0, s2
+; CHECK-ZVKB-NOZBB32-NEXT: li a1, 0
+; CHECK-ZVKB-NOZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-NOZBB32-NEXT: call __muldi3
+; CHECK-ZVKB-NOZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-NOZBB32-NEXT: li a2, 0
+; CHECK-ZVKB-NOZBB32-NEXT: vsetvli a4, zero, e32, m2, ta, ma
+; CHECK-ZVKB-NOZBB32-NEXT: .LBB98_3: # %vector.body
+; CHECK-ZVKB-NOZBB32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-NOZBB32-NEXT: slli a4, a3, 2
+; CHECK-ZVKB-NOZBB32-NEXT: add a5, a3, a0
+; CHECK-ZVKB-NOZBB32-NEXT: add a2, a2, a1
+; CHECK-ZVKB-NOZBB32-NEXT: add a4, s0, a4
+; CHECK-ZVKB-NOZBB32-NEXT: vl2re32.v v8, (a4)
+; CHECK-ZVKB-NOZBB32-NEXT: sltu a3, a5, a3
+; CHECK-ZVKB-NOZBB32-NEXT: add a2, a2, a3
+; CHECK-ZVKB-NOZBB32-NEXT: xor a3, a5, s3
+; CHECK-ZVKB-NOZBB32-NEXT: vandn.vx v8, v8, s1
+; CHECK-ZVKB-NOZBB32-NEXT: or a6, a3, a2
+; CHECK-ZVKB-NOZBB32-NEXT: vs2r.v v8, (a4)
+; CHECK-ZVKB-NOZBB32-NEXT: mv a3, a5
+; CHECK-ZVKB-NOZBB32-NEXT: bnez a6, .LBB98_3
+; CHECK-ZVKB-NOZBB32-NEXT: # %bb.4: # %middle.block
+; CHECK-ZVKB-NOZBB32-NEXT: bnez s3, .LBB98_7
+; CHECK-ZVKB-NOZBB32-NEXT: .LBB98_5: # %for.body.preheader
+; CHECK-ZVKB-NOZBB32-NEXT: not a0, s1
+; CHECK-ZVKB-NOZBB32-NEXT: .LBB98_6: # %for.body
+; CHECK-ZVKB-NOZBB32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-NOZBB32-NEXT: slli a1, s3, 2
+; CHECK-ZVKB-NOZBB32-NEXT: addi s3, s3, 1
+; CHECK-ZVKB-NOZBB32-NEXT: add a1, s0, a1
+; CHECK-ZVKB-NOZBB32-NEXT: lw a2, 0(a1)
+; CHECK-ZVKB-NOZBB32-NEXT: seqz a3, s3
+; CHECK-ZVKB-NOZBB32-NEXT: add s4, s4, a3
+; CHECK-ZVKB-NOZBB32-NEXT: xori a3, s3, 256
+; CHECK-ZVKB-NOZBB32-NEXT: and a2, a2, a0
+; CHECK-ZVKB-NOZBB32-NEXT: or a3, a3, s4
+; CHECK-ZVKB-NOZBB32-NEXT: sw a2, 0(a1)
+; CHECK-ZVKB-NOZBB32-NEXT: bnez a3, .LBB98_6
+; CHECK-ZVKB-NOZBB32-NEXT: .LBB98_7: # %for.cond.cleanup
+; CHECK-ZVKB-NOZBB32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-NOZBB32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-NOZBB32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-NOZBB32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-NOZBB32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-NOZBB32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_restore ra
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_restore s0
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_restore s1
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_restore s2
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_restore s3
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_restore s4
+; CHECK-ZVKB-NOZBB32-NEXT: addi sp, sp, 32
+; CHECK-ZVKB-NOZBB32-NEXT: .cfi_def_cfa_offset 0
+; CHECK-ZVKB-NOZBB32-NEXT: ret
+;
+; CHECK-ZVKB-NOZBB64-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-ZVKB-NOZBB64: # %bb.0: # %entry
+; CHECK-ZVKB-NOZBB64-NEXT: addi sp, sp, -32
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_def_cfa_offset 32
+; CHECK-ZVKB-NOZBB64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-NOZBB64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-NOZBB64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-NOZBB64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_offset ra, -8
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_offset s0, -16
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_offset s1, -24
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_offset s2, -32
+; CHECK-ZVKB-NOZBB64-NEXT: mv s0, a1
+; CHECK-ZVKB-NOZBB64-NEXT: csrr s2, vlenb
+; CHECK-ZVKB-NOZBB64-NEXT: srli a2, s2, 3
+; CHECK-ZVKB-NOZBB64-NEXT: li a1, 64
+; CHECK-ZVKB-NOZBB64-NEXT: mv s1, a0
+; CHECK-ZVKB-NOZBB64-NEXT: bgeu a1, a2, .LBB98_2
+; CHECK-ZVKB-NOZBB64-NEXT: # %bb.1:
+; CHECK-ZVKB-NOZBB64-NEXT: li a0, 0
+; CHECK-ZVKB-NOZBB64-NEXT: j .LBB98_5
+; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_2: # %vector.ph
+; CHECK-ZVKB-NOZBB64-NEXT: li a1, 508
+; CHECK-ZVKB-NOZBB64-NEXT: mv a0, a2
+; CHECK-ZVKB-NOZBB64-NEXT: call __muldi3
+; CHECK-ZVKB-NOZBB64-NEXT: andi a0, a0, 256
+; CHECK-ZVKB-NOZBB64-NEXT: srli a1, s2, 1
+; CHECK-ZVKB-NOZBB64-NEXT: slli a2, s2, 1
+; CHECK-ZVKB-NOZBB64-NEXT: mv a3, s1
+; CHECK-ZVKB-NOZBB64-NEXT: mv a4, a0
+; CHECK-ZVKB-NOZBB64-NEXT: vsetvli a5, zero, e32, m2, ta, ma
+; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_3: # %vector.body
+; CHECK-ZVKB-NOZBB64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-NOZBB64-NEXT: vl2re32.v v8, (a3)
+; CHECK-ZVKB-NOZBB64-NEXT: sub a4, a4, a1
+; CHECK-ZVKB-NOZBB64-NEXT: vandn.vx v8, v8, s0
+; CHECK-ZVKB-NOZBB64-NEXT: vs2r.v v8, (a3)
+; CHECK-ZVKB-NOZBB64-NEXT: add a3, a3, a2
+; CHECK-ZVKB-NOZBB64-NEXT: bnez a4, .LBB98_3
+; CHECK-ZVKB-NOZBB64-NEXT: # %bb.4: # %middle.block
+; CHECK-ZVKB-NOZBB64-NEXT: bnez a0, .LBB98_7
+; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_5: # %for.body.preheader
+; CHECK-ZVKB-NOZBB64-NEXT: not a1, s0
+; CHECK-ZVKB-NOZBB64-NEXT: slli a0, a0, 2
+; CHECK-ZVKB-NOZBB64-NEXT: add a0, s1, a0
+; CHECK-ZVKB-NOZBB64-NEXT: addi a2, s1, 1024
+; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_6: # %for.body
+; CHECK-ZVKB-NOZBB64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-NOZBB64-NEXT: lw a3, 0(a0)
+; CHECK-ZVKB-NOZBB64-NEXT: and a3, a3, a1
+; CHECK-ZVKB-NOZBB64-NEXT: sw a3, 0(a0)
+; CHECK-ZVKB-NOZBB64-NEXT: addi a0, a0, 4
+; CHECK-ZVKB-NOZBB64-NEXT: bne a0, a2, .LBB98_6
+; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_7: # %for.cond.cleanup
+; CHECK-ZVKB-NOZBB64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-NOZBB64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-NOZBB64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-NOZBB64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_restore ra
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_restore s0
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_restore s1
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_restore s2
+; CHECK-ZVKB-NOZBB64-NEXT: addi sp, sp, 32
+; CHECK-ZVKB-NOZBB64-NEXT: .cfi_def_cfa_offset 0
+; CHECK-ZVKB-NOZBB64-NEXT: ret
+;
+; CHECK-ZVKB-ZBB32-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-ZVKB-ZBB32: # %bb.0: # %entry
+; CHECK-ZVKB-ZBB32-NEXT: addi sp, sp, -32
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_def_cfa_offset 32
+; CHECK-ZVKB-ZBB32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-ZBB32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-ZBB32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-ZBB32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-ZBB32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-ZBB32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_offset ra, -4
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_offset s0, -8
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_offset s1, -12
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_offset s2, -16
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_offset s3, -20
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_offset s4, -24
+; CHECK-ZVKB-ZBB32-NEXT: mv s0, a1
+; CHECK-ZVKB-ZBB32-NEXT: mv s1, a0
+; CHECK-ZVKB-ZBB32-NEXT: csrr a0, vlenb
+; CHECK-ZVKB-ZBB32-NEXT: srli s2, a0, 3
+; CHECK-ZVKB-ZBB32-NEXT: li a2, 1
+; CHECK-ZVKB-ZBB32-NEXT: mv a0, s2
+; CHECK-ZVKB-ZBB32-NEXT: li a1, 0
+; CHECK-ZVKB-ZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-ZBB32-NEXT: call __muldi3
+; CHECK-ZVKB-ZBB32-NEXT: sltiu a0, a0, 65
+; CHECK-ZVKB-ZBB32-NEXT: seqz a1, a1
+; CHECK-ZVKB-ZBB32-NEXT: and a0, a1, a0
+; CHECK-ZVKB-ZBB32-NEXT: bnez a0, .LBB98_2
+; CHECK-ZVKB-ZBB32-NEXT: # %bb.1:
+; CHECK-ZVKB-ZBB32-NEXT: li s3, 0
+; CHECK-ZVKB-ZBB32-NEXT: li s4, 0
+; CHECK-ZVKB-ZBB32-NEXT: j .LBB98_5
+; CHECK-ZVKB-ZBB32-NEXT: .LBB98_2: # %vector.ph
+; CHECK-ZVKB-ZBB32-NEXT: li a2, 508
+; CHECK-ZVKB-ZBB32-NEXT: mv a0, s2
+; CHECK-ZVKB-ZBB32-NEXT: li a1, 0
+; CHECK-ZVKB-ZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-ZBB32-NEXT: call __muldi3
+; CHECK-ZVKB-ZBB32-NEXT: li s4, 0
+; CHECK-ZVKB-ZBB32-NEXT: andi s3, a0, 256
+; CHECK-ZVKB-ZBB32-NEXT: li a2, 4
+; CHECK-ZVKB-ZBB32-NEXT: mv a0, s2
+; CHECK-ZVKB-ZBB32-NEXT: li a1, 0
+; CHECK-ZVKB-ZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-ZBB32-NEXT: call __muldi3
+; CHECK-ZVKB-ZBB32-NEXT: li a3, 0
+; CHECK-ZVKB-ZBB32-NEXT: li a2, 0
+; CHECK-ZVKB-ZBB32-NEXT: vsetvli a4, zero, e32, m2, ta, ma
+; CHECK-ZVKB-ZBB32-NEXT: .LBB98_3: # %vector.body
+; CHECK-ZVKB-ZBB32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-ZBB32-NEXT: slli a4, a3, 2
+; CHECK-ZVKB-ZBB32-NEXT: add a5, a3, a0
+; CHECK-ZVKB-ZBB32-NEXT: add a2, a2, a1
+; CHECK-ZVKB-ZBB32-NEXT: add a4, s1, a4
+; CHECK-ZVKB-ZBB32-NEXT: vl2re32.v v8, (a4)
+; CHECK-ZVKB-ZBB32-NEXT: sltu a3, a5, a3
+; CHECK-ZVKB-ZBB32-NEXT: add a2, a2, a3
+; CHECK-ZVKB-ZBB32-NEXT: xor a3, a5, s3
+; CHECK-ZVKB-ZBB32-NEXT: vandn.vx v8, v8, s0
+; CHECK-ZVKB-ZBB32-NEXT: or a6, a3, a2
+; CHECK-ZVKB-ZBB32-NEXT: vs2r.v v8, (a4)
+; CHECK-ZVKB-ZBB32-NEXT: mv a3, a5
+; CHECK-ZVKB-ZBB32-NEXT: bnez a6, .LBB98_3
+; CHECK-ZVKB-ZBB32-NEXT: # %bb.4: # %middle.block
+; CHECK-ZVKB-ZBB32-NEXT: bnez s3, .LBB98_6
+; CHECK-ZVKB-ZBB32-NEXT: .LBB98_5: # %for.body
+; CHECK-ZVKB-ZBB32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-ZBB32-NEXT: slli a0, s3, 2
+; CHECK-ZVKB-ZBB32-NEXT: addi s3, s3, 1
+; CHECK-ZVKB-ZBB32-NEXT: add a0, s1, a0
+; CHECK-ZVKB-ZBB32-NEXT: lw a1, 0(a0)
+; CHECK-ZVKB-ZBB32-NEXT: seqz a2, s3
+; CHECK-ZVKB-ZBB32-NEXT: add s4, s4, a2
+; CHECK-ZVKB-ZBB32-NEXT: xori a2, s3, 256
+; CHECK-ZVKB-ZBB32-NEXT: andn a1, a1, s0
+; CHECK-ZVKB-ZBB32-NEXT: or a2, a2, s4
+; CHECK-ZVKB-ZBB32-NEXT: sw a1, 0(a0)
+; CHECK-ZVKB-ZBB32-NEXT: bnez a2, .LBB98_5
+; CHECK-ZVKB-ZBB32-NEXT: .LBB98_6: # %for.cond.cleanup
+; CHECK-ZVKB-ZBB32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-ZBB32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-ZBB32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-ZBB32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-ZBB32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-ZBB32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_restore ra
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_restore s0
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_restore s1
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_restore s2
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_restore s3
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_restore s4
+; CHECK-ZVKB-ZBB32-NEXT: addi sp, sp, 32
+; CHECK-ZVKB-ZBB32-NEXT: .cfi_def_cfa_offset 0
+; CHECK-ZVKB-ZBB32-NEXT: ret
+;
+; CHECK-ZVKB-ZBB64-LABEL: vand_vx_loop_hoisted_not:
+; CHECK-ZVKB-ZBB64: # %bb.0: # %entry
+; CHECK-ZVKB-ZBB64-NEXT: addi sp, sp, -32
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_def_cfa_offset 32
+; CHECK-ZVKB-ZBB64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-ZBB64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-ZBB64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-ZBB64-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_offset ra, -8
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_offset s0, -16
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_offset s1, -24
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_offset s2, -32
+; CHECK-ZVKB-ZBB64-NEXT: mv s0, a1
+; CHECK-ZVKB-ZBB64-NEXT: csrr s2, vlenb
+; CHECK-ZVKB-ZBB64-NEXT: srli a2, s2, 3
+; CHECK-ZVKB-ZBB64-NEXT: li a1, 64
+; CHECK-ZVKB-ZBB64-NEXT: mv s1, a0
+; CHECK-ZVKB-ZBB64-NEXT: bgeu a1, a2, .LBB98_2
+; CHECK-ZVKB-ZBB64-NEXT: # %bb.1:
+; CHECK-ZVKB-ZBB64-NEXT: li a0, 0
+; CHECK-ZVKB-ZBB64-NEXT: j .LBB98_5
+; CHECK-ZVKB-ZBB64-NEXT: .LBB98_2: # %vector.ph
+; CHECK-ZVKB-ZBB64-NEXT: li a1, 508
+; CHECK-ZVKB-ZBB64-NEXT: mv a0, a2
+; CHECK-ZVKB-ZBB64-NEXT: call __muldi3
+; CHECK-ZVKB-ZBB64-NEXT: andi a0, a0, 256
+; CHECK-ZVKB-ZBB64-NEXT: srli a1, s2, 1
+; CHECK-ZVKB-ZBB64-NEXT: slli a2, s2, 1
+; CHECK-ZVKB-ZBB64-NEXT: mv a3, s1
+; CHECK-ZVKB-ZBB64-NEXT: mv a4, a0
+; CHECK-ZVKB-ZBB64-NEXT: vsetvli a5, zero, e32, m2, ta, ma
+; CHECK-ZVKB-ZBB64-NEXT: .LBB98_3: # %vector.body
+; CHECK-ZVKB-ZBB64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-ZBB64-NEXT: vl2re32.v v8, (a3)
+; CHECK-ZVKB-ZBB64-NEXT: sub a4, a4, a1
+; CHECK-ZVKB-ZBB64-NEXT: vandn.vx v8, v8, s0
+; CHECK-ZVKB-ZBB64-NEXT: vs2r.v v8, (a3)
+; CHECK-ZVKB-ZBB64-NEXT: add a3, a3, a2
+; CHECK-ZVKB-ZBB64-NEXT: bnez a4, .LBB98_3
+; CHECK-ZVKB-ZBB64-NEXT: # %bb.4: # %middle.block
+; CHECK-ZVKB-ZBB64-NEXT: bnez a0, .LBB98_7
+; CHECK-ZVKB-ZBB64-NEXT: .LBB98_5: # %for.body.preheader
+; CHECK-ZVKB-ZBB64-NEXT: slli a0, a0, 2
+; CHECK-ZVKB-ZBB64-NEXT: add a0, s1, a0
+; CHECK-ZVKB-ZBB64-NEXT: addi a1, s1, 1024
+; CHECK-ZVKB-ZBB64-NEXT: .LBB98_6: # %for.body
+; CHECK-ZVKB-ZBB64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-ZVKB-ZBB64-NEXT: lw a2, 0(a0)
+; CHECK-ZVKB-ZBB64-NEXT: andn a2, a2, s0
+; CHECK-ZVKB-ZBB64-NEXT: sw a2, 0(a0)
+; CHECK-ZVKB-ZBB64-NEXT: addi a0, a0, 4
+; CHECK-ZVKB-ZBB64-NEXT: bne a0, a1, .LBB98_6
+; CHECK-ZVKB-ZBB64-NEXT: .LBB98_7: # %for.cond.cleanup
+; CHECK-ZVKB-ZBB64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-ZBB64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-ZBB64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-ZBB64-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_restore ra
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_restore s0
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_restore s1
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_restore s2
+; CHECK-ZVKB-ZBB64-NEXT: addi sp, sp, 32
+; CHECK-ZVKB-ZBB64-NEXT: .cfi_def_cfa_offset 0
+; CHECK-ZVKB-ZBB64-NEXT: ret
entry:
%not = xor i32 %mask, -1
%vscale = tail call i64 @llvm.vscale.i64()
@@ -2607,8 +2768,3 @@ for.body:
%exitcond.not = icmp eq i64 %indvars.iv.next, 256
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-ZVKB-NOZBB32: {{.*}}
-; CHECK-ZVKB-NOZBB64: {{.*}}
-; CHECK-ZVKB-ZBB32: {{.*}}
-; CHECK-ZVKB-ZBB64: {{.*}}
More information about the llvm-commits
mailing list