[llvm] [DAGCombiner] Limit EXTRACT(SETCC) combines in scalarizeExtractedBinOp to i1 types (PR #123071)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 16 05:33:09 PST 2025


https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/123071

>From 5e6b1568c3aded63dbf8b260445208846a21a3b5 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 16 Jan 2025 13:31:02 +0000
Subject: [PATCH 1/2] Add test

---
 .../CodeGen/AArch64/extract-vector-cmp.ll     | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index 12bd2db2297d77..b7f44d0314adf9 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -141,6 +141,24 @@ for.cond.cleanup:
 }
 
 
+define i32 @issue_121372(<4 x i32> %v) {
+; CHECK-LABEL: issue_121372:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    csetm w0, lt
+; CHECK-NEXT:    ret
+  %cmp_ule = icmp ule <4 x i32> %v, zeroinitializer
+  %sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32>
+  %cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1
+  %ext = extractelement <4 x i1> %cmp_sge, i32 0
+  %res = sext i1 %ext to i32
+  ret i32 %res
+}
+
+
 ; Negative tests
 
 define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
@@ -163,9 +181,9 @@ define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
 ; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #235
-; CHECK-NEXT:    adrp x9, .LCPI7_0
+; CHECK-NEXT:    adrp x9, .LCPI8_0
 ; CHECK-NEXT:    mov x8, x0
-; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI7_0]
+; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI8_0]
 ; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    xtn v1.4h, v0.4s
 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b

>From e89faba653ad3f71fa77dc4763aefbd10e8545df Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 16 Jan 2025 13:31:10 +0000
Subject: [PATCH 2/2] [DAGCombiner] Fix issue #121372

PR #118823 added a DAG combine for extracting elements of a
vector returned from SETCC, however it doesn't correctly deal
with the case where the vector element type is not i1. In
this case we have to take account of the boolean contents,
which are represent differently between vectors and scalars.
The code now explicitly performs an inreg sign extend in
order to get the same result.

Fixes #121372
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 ++++++++++++++++---
 .../CodeGen/AArch64/extract-vector-cmp.ll     |  7 ++--
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6805e0cb23ace0..cf6d5215731cdd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22808,9 +22808,26 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
     return SDValue();
 
   EVT ResVT = ExtElt->getValueType(0);
-  if (Opc == ISD::SETCC &&
-      (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
-    return SDValue();
+  bool SetCCNeedsSignExt = false;
+  if (Opc == ISD::SETCC) {
+    EVT VecVT = Vec.getValueType();
+    if (ResVT != VecVT.getVectorElementType() || LegalTypes)
+      return SDValue();
+
+    if (ResVT != MVT::i1) {
+      bool VecRequiresSignExt = TLI.getBooleanContents(VecVT) ==
+                                TargetLowering::ZeroOrNegativeOneBooleanContent;
+      bool ScalarRequiresSignExt =
+          TLI.getBooleanContents(ResVT) ==
+          TargetLowering::ZeroOrNegativeOneBooleanContent;
+      if (VecRequiresSignExt && !ScalarRequiresSignExt)
+        SetCCNeedsSignExt = true;
+      else if (!VecRequiresSignExt && ScalarRequiresSignExt) {
+        // There are currently no targets with this behaviour.
+        return SDValue();
+      }
+    }
+  }
 
   // Targets may want to avoid this to prevent an expensive register transfer.
   if (!TLI.shouldScalarizeBinop(Vec))
@@ -22834,8 +22851,14 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
     EVT OpVT = Op0.getValueType().getVectorElementType();
     Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
     Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
-    return DAG.getSetCC(DL, ResVT, Op0, Op1,
-                        cast<CondCodeSDNode>(Vec->getOperand(2))->get());
+    SDValue NewVal = DAG.getSetCC(
+        DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
+    // We may need to sign-extend the result to match the same behaviour as the
+    // vector version of SETCC.
+    if (SetCCNeedsSignExt)
+      NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
+                           DAG.getValueType(MVT::i1));
+    return NewVal;
   }
   Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
   Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index b7f44d0314adf9..8345fdfa46b4cd 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -58,10 +58,11 @@ define i128 @extract_icmp_v1i128(ptr %p) {
 ; CHECK-LABEL: extract_icmp_v1i128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp x9, x8, [x0]
-; CHECK-NEXT:    mov x1, xzr
 ; CHECK-NEXT:    orr x8, x9, x8
 ; CHECK-NEXT:    cmp x8, #0
-; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx x0, x8, #0, #1
+; CHECK-NEXT:    mov x1, x0
 ; CHECK-NEXT:    ret
   %load = load <1 x i128>, ptr %p, align 16
   %cmp = icmp eq <1 x i128> %load, zeroinitializer
@@ -141,12 +142,14 @@ for.cond.cleanup:
 }
 
 
+; TODO: Combine the sbfx(cset) into a csetm
 define i32 @issue_121372(<4 x i32> %v) {
 ; CHECK-LABEL: issue_121372:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx w8, w8, #0, #1
 ; CHECK-NEXT:    cmp w8, #1
 ; CHECK-NEXT:    csetm w0, lt
 ; CHECK-NEXT:    ret



More information about the llvm-commits mailing list