[llvm] [DAGCombiner] Limit EXTRACT(SETCC) combines in scalarizeExtractedBinOp to i1 types (PR #123071)

Wed Jan 15 07:46:16 PST 2025

https://github.com/david-arm created https://github.com/llvm/llvm-project/pull/123071

PR https://github.com/llvm/llvm-project/pull/118823 added a
DAG combine for extracting elements of a vector returned from
SETCC, however it doesn't correctly deal with the case where
the vector element type is not i1. In this case we have to
take account of the boolean contents, which are represent
differently between vectors and scalars. For now, I've just
restricted the optimisation to i1 types.

Fixes https://github.com/llvm/llvm-project/issues/121372

>From 15b8b7e8d965000faa16d843deb847865ef86ef1 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 15 Jan 2025 15:40:27 +0000
Subject: [PATCH 1/2] Add test

---
 .../CodeGen/AArch64/extract-vector-cmp.ll     | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index 12bd2db2297d77..56e92e9517cba4 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -141,6 +141,24 @@ for.cond.cleanup:
 }
 
 
+define i32 @issue_121372(<4 x i32> %0) {
+; CHECK-LABEL: issue_121372:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    csetm w0, lt
+; CHECK-NEXT:    ret
+  %2 = icmp ule <4 x i32> %0, zeroinitializer
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  %4 = icmp sge <4 x i32> zeroinitializer, %3
+  %5 = extractelement <4 x i1> %4, i32 0
+  %6 = sext i1 %5 to i32
+  ret i32 %6
+}
+
+
 ; Negative tests
 
 define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
@@ -163,9 +181,9 @@ define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
 ; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #235
-; CHECK-NEXT:    adrp x9, .LCPI7_0
+; CHECK-NEXT:    adrp x9, .LCPI8_0
 ; CHECK-NEXT:    mov x8, x0
-; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI7_0]
+; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI8_0]
 ; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    xtn v1.4h, v0.4s
 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b

>From 7f881777a27ef9c457d3dc22ea920dcc41de585a Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 15 Jan 2025 15:40:36 +0000
Subject: [PATCH 2/2] [DAGCombiner] Fix issue #121372

PR #118823 added a DAG combine for extracting elements of a
vector returned from SETCC, however it doesn't correctly deal
with the case where the vector element type is not i1. In
this case we have to take account of the boolean contents,
which are represent differently between vectors and scalars.
For now, I've just restricted the optimisation to i1 types.

Fixes #121372
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp   | 3 ++-
 llvm/test/CodeGen/AArch64/extract-vector-cmp.ll | 9 +++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6805e0cb23ace0..817c1c21718236 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22809,7 +22809,8 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
 
   EVT ResVT = ExtElt->getValueType(0);
   if (Opc == ISD::SETCC &&
-      (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
+      (ResVT != Vec.getValueType().getVectorElementType() || ResVT != MVT::i1 ||
+       LegalTypes))
     return SDValue();
 
   // Targets may want to avoid this to prevent an expensive register transfer.
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index 56e92e9517cba4..38961ae6227bfa 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -58,10 +58,11 @@ define i128 @extract_icmp_v1i128(ptr %p) {
 ; CHECK-LABEL: extract_icmp_v1i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp x9, x8, [x0]
-; CHECK-NEXT:    mov x1, xzr
 ; CHECK-NEXT:    orr x8, x9, x8
 ; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx x0, x8, #0, #1
+; CHECK-NEXT:    mov x1, x0
 ; CHECK-NEXT:    ret
   %load = load <1 x i128>, ptr %p, align 16
   %cmp = icmp eq <1 x i128> %load, zeroinitializer
@@ -144,9 +145,9 @@ for.cond.cleanup:
 define i32 @issue_121372(<4 x i32> %0) {
 ; CHECK-LABEL: issue_121372:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    cmp w8, #0
-; CHECK-NEXT:    cset w8, eq
 ; CHECK-NEXT:    cmp w8, #1
 ; CHECK-NEXT:    csetm w0, lt
 ; CHECK-NEXT:    ret