[llvm] [LoongArch] Support vector types for hasAndNot to enable more DAG combines (PR #159056)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 01:48:01 PDT 2025
https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/159056
>From 1b29a1804fd9774dac3732e891005fe1f0f1ffc3 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 16 Sep 2025 19:49:12 +0800
Subject: [PATCH 1/2] [LoongArch][NFC] Add tests for combining `and(add(not))`
---
.../CodeGen/LoongArch/lasx/and-not-combine.ll | 94 +++++++++++++++++++
.../CodeGen/LoongArch/lsx/and-not-combine.ll | 94 +++++++++++++++++++
2 files changed, 188 insertions(+)
create mode 100644 llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
create mode 100644 llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
new file mode 100644
index 0000000000000..75ac4c99ef7c1
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @and_not_combine_v32i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a3, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
+; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <32 x i8>, ptr %a0
+ %v1 = load <32 x i8>, ptr %a1
+ %v2 = load <32 x i8>, ptr %a2
+ %not = xor <32 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %add = add <32 x i8> %not, %v2
+ %and = and <32 x i8> %v0, %add
+ store <32 x i8> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_v16i16(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a3, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvrepli.b $xr3, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
+; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <16 x i16>, ptr %a0
+ %v1 = load <16 x i16>, ptr %a1
+ %v2 = load <16 x i16>, ptr %a2
+ %not = xor <16 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %add = add <16 x i16> %not, %v2
+ %and = and <16 x i16> %v0, %add
+ store <16 x i16> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_v8i32(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a3, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvrepli.b $xr3, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
+; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x i32>, ptr %a0
+ %v1 = load <8 x i32>, ptr %a1
+ %v2 = load <8 x i32>, ptr %a2
+ %not = xor <8 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %add = add <8 x i32> %not, %v2
+ %and = and <8 x i32> %v0, %add
+ store <8 x i32> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_v4i64(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a3, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvrepli.b $xr3, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
+; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x i64>, ptr %a0
+ %v1 = load <4 x i64>, ptr %a1
+ %v2 = load <4 x i64>, ptr %a2
+ %not = xor <4 x i64> %v1, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %add = add <4 x i64> %not, %v2
+ %and = and <4 x i64> %v0, %add
+ store <4 x i64> %and, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
new file mode 100644
index 0000000000000..39060bfa92c0d
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @and_not_combine_v16i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a2, 0
+; CHECK-NEXT: vld $vr1, $a3, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
+; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <16 x i8>, ptr %a0
+ %v1 = load <16 x i8>, ptr %a1
+ %v2 = load <16 x i8>, ptr %a2
+ %not = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %add = add <16 x i8> %not, %v2
+ %and = and <16 x i8> %v0, %add
+ store <16 x i8> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_v8i16(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a2, 0
+; CHECK-NEXT: vld $vr1, $a3, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vrepli.b $vr3, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
+; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <8 x i16>, ptr %a0
+ %v1 = load <8 x i16>, ptr %a1
+ %v2 = load <8 x i16>, ptr %a2
+ %not = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %add = add <8 x i16> %not, %v2
+ %and = and <8 x i16> %v0, %add
+ store <8 x i16> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_v4i32(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a2, 0
+; CHECK-NEXT: vld $vr1, $a3, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vrepli.b $vr3, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
+; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <4 x i32>, ptr %a0
+ %v1 = load <4 x i32>, ptr %a1
+ %v2 = load <4 x i32>, ptr %a2
+ %not = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %add = add <4 x i32> %not, %v2
+ %and = and <4 x i32> %v0, %add
+ store <4 x i32> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_v2i64(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
+; CHECK-LABEL: and_not_combine_v2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a2, 0
+; CHECK-NEXT: vld $vr1, $a3, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vrepli.b $vr3, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
+; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %v0 = load <2 x i64>, ptr %a0
+ %v1 = load <2 x i64>, ptr %a1
+ %v2 = load <2 x i64>, ptr %a2
+ %not = xor <2 x i64> %v1, <i64 -1, i64 -1>
+ %add = add <2 x i64> %not, %v2
+ %and = and <2 x i64> %v0, %add
+ store <2 x i64> %and, ptr %res
+ ret void
+}
>From e6f1f61f150808760ea03e16969486b9666254a0 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 16 Sep 2025 19:51:34 +0800
Subject: [PATCH 2/2] [LoongArch] Support vector types for hasAndNot to enable
more DAG combines
After this commit, DAGCombiner will have more opportunities to
optimize vector types `and+...+not` to `andn`.
Many combines in DAGCombiner will be enabled, but only shows
changes after combining `and(add(not))` to `and(not(sub))` in
the tests of this commit.
---
.../LoongArch/LoongArchISelLowering.cpp | 8 +++++--
.../CodeGen/LoongArch/lasx/and-not-combine.ll | 23 +++++++------------
.../CodeGen/LoongArch/lsx/and-not-combine.ll | 23 +++++++------------
3 files changed, 22 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..7a4e5b5597f7c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -8249,8 +8249,12 @@ EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
}
bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
- // TODO: Support vectors.
- return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+ return Subtarget.hasExtLSX() && VT.isInteger();
+
+ return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
}
bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
index 75ac4c99ef7c1..67549599db2f3 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
@@ -8,9 +8,8 @@ define void @and_not_combine_v32i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind
; CHECK-NEXT: xvld $xr0, $a2, 0
; CHECK-NEXT: xvld $xr1, $a3, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
-; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -30,10 +29,8 @@ define void @and_not_combine_v16i16(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwin
; CHECK-NEXT: xvld $xr0, $a2, 0
; CHECK-NEXT: xvld $xr1, $a3, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
-; CHECK-NEXT: xvrepli.b $xr3, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -53,10 +50,8 @@ define void @and_not_combine_v8i32(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind
; CHECK-NEXT: xvld $xr0, $a2, 0
; CHECK-NEXT: xvld $xr1, $a3, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
-; CHECK-NEXT: xvrepli.b $xr3, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -76,10 +71,8 @@ define void @and_not_combine_v4i64(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind
; CHECK-NEXT: xvld $xr0, $a2, 0
; CHECK-NEXT: xvld $xr1, $a3, 0
; CHECK-NEXT: xvld $xr2, $a1, 0
-; CHECK-NEXT: xvrepli.b $xr3, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr3
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
index 39060bfa92c0d..3c6d34505e114 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
@@ -8,9 +8,8 @@ define void @and_not_combine_v16i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind
; CHECK-NEXT: vld $vr0, $a2, 0
; CHECK-NEXT: vld $vr1, $a3, 0
; CHECK-NEXT: vld $vr2, $a1, 0
-; CHECK-NEXT: vxori.b $vr0, $vr0, 255
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
-; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -30,10 +29,8 @@ define void @and_not_combine_v8i16(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind
; CHECK-NEXT: vld $vr0, $a2, 0
; CHECK-NEXT: vld $vr1, $a3, 0
; CHECK-NEXT: vld $vr2, $a1, 0
-; CHECK-NEXT: vrepli.b $vr3, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -53,10 +50,8 @@ define void @and_not_combine_v4i32(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind
; CHECK-NEXT: vld $vr0, $a2, 0
; CHECK-NEXT: vld $vr1, $a3, 0
; CHECK-NEXT: vld $vr2, $a1, 0
-; CHECK-NEXT: vrepli.b $vr3, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -76,10 +71,8 @@ define void @and_not_combine_v2i64(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind
; CHECK-NEXT: vld $vr0, $a2, 0
; CHECK-NEXT: vld $vr1, $a3, 0
; CHECK-NEXT: vld $vr2, $a1, 0
-; CHECK-NEXT: vrepli.b $vr3, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr3
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vand.v $vr0, $vr2, $vr0
+; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
More information about the llvm-commits
mailing list