[llvm] cdcce3c - [SystemZ] Also accept ISD::USUBO in shouldFormOverflowOp().

Tue Mar 3 05:40:00 PST 2020

Author: Jonas Paulsson
Date: 2020-03-03T14:38:57+01:00
New Revision: cdcce3cabf5b2e151fb324d7a7e4b6cb6e40716d

URL: https://github.com/llvm/llvm-project/commit/cdcce3cabf5b2e151fb324d7a7e4b6cb6e40716d
DIFF: https://github.com/llvm/llvm-project/commit/cdcce3cabf5b2e151fb324d7a7e4b6cb6e40716d.diff

LOG: [SystemZ]  Also accept ISD::USUBO in shouldFormOverflowOp().

Forming subtract with overflow is beneficial on SystemZ, just like additions.

Review: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D75290

Added: 
    llvm/test/CodeGen/SystemZ/codegenprepare-form-OF-ops.ll

Modified: 
    llvm/lib/Target/SystemZ/SystemZISelLowering.h
    llvm/test/CodeGen/SystemZ/dag-combine-05.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 51b97d3b6f6a..739377ed0f95 100644

--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -442,9 +442,9 @@ class SystemZTargetLowering : public TargetLowering {
 
   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
                             bool MathUsed) const override {
-    // Using overflow ops for overflow checks only should beneficial on
-    // SystemZ.
-    return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
+    // Form add and sub with overflow intrinsics regardless of any extra
+    // users of the math result.
+    return VT == MVT::i32 || VT == MVT::i64;
   }
 
   const char *getTargetNodeName(unsigned Opcode) const override;

diff  --git a/llvm/test/CodeGen/SystemZ/codegenprepare-form-OF-ops.ll b/llvm/test/CodeGen/SystemZ/codegenprepare-form-OF-ops.ll
new file mode 100644
index 000000000000..161f4bc2b765
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/codegenprepare-form-OF-ops.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s
+;
+; Check that CodeGenPrepare transforms these functions to use
+; uadd.with.overflow / usub.with.overflow intrinsics so that the compare
+; instruction is eliminated.
+
+define i32 @uaddo_32(i32 %arg)  {
+; CHECK-LABEL: uaddo_32:
+; CHECK: alhsik	 %r0, %r2, -1
+; CHECK: locrnle %r2, %r0
+; CHECK: br      %r14
+
+bb:
+  %tmp10 = icmp ne i32 %arg, 0
+  %tmp11 = add nsw i32 %arg, -1
+  %tmp12 = select i1 %tmp10, i32 %tmp11, i32 %arg
+  ret i32 %tmp12
+}
+
+define i64 @uaddo_64(i64 %arg)  {
+; CHECK-LABEL: uaddo_64:
+; CHECK: alghsik  %r0, %r2, -1
+; CHECK: locgrnle %r2, %r0
+; CHECK: br       %r14
+bb:
+  %tmp10 = icmp ne i64 %arg, 0
+  %tmp11 = add nsw i64 %arg, -1
+  %tmp12 = select i1 %tmp10, i64 %tmp11, i64 %arg
+  ret i64 %tmp12
+}
+
+define i32 @usubo_32(i32 %arg)  {
+; CHECK-LABEL: usubo_32:
+; CHECK: alhsik %r0, %r2, -1
+; CHECK: locrle %r2, %r0
+; CHECK: br     %r14
+bb:
+  %tmp10 = icmp eq i32 %arg, 0
+  %tmp11 = sub nsw i32 %arg, 1
+  %tmp12 = select i1 %tmp10, i32 %tmp11, i32 %arg
+  ret i32 %tmp12
+}
+
+define i64 @usubo_64(i64 %arg)  {
+; CHECK-LABEL: usubo_64:
+; CHECK: alghsik %r0, %r2, -1
+; CHECK: locgrle %r2, %r0
+; CHECK: br      %r14
+bb:
+  %tmp10 = icmp eq i64 %arg, 0
+  %tmp11 = sub nsw i64 %arg, 1
+  %tmp12 = select i1 %tmp10, i64 %tmp11, i64 %arg
+  ret i64 %tmp12
+}

diff  --git a/llvm/test/CodeGen/SystemZ/dag-combine-05.ll b/llvm/test/CodeGen/SystemZ/dag-combine-05.ll
index 78b129fc2f73..eb9fcc296921 100644
--- a/llvm/test/CodeGen/SystemZ/dag-combine-05.ll
+++ b/llvm/test/CodeGen/SystemZ/dag-combine-05.ll
@@ -26,10 +26,13 @@ bb:
   %tmp = icmp ult i16 %arg0, 9616
   %tmp1 = zext i1 %tmp to i32
   %tmp2 = load i16, i16* %src
-  %tmp3 = add i16 %tmp2, -1
-  %tmp4 = icmp ne i16 %tmp2, 0
-  %tmp5 = zext i1 %tmp4 to i32
+  %0 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %tmp2, i16 -1)
+  %math = extractvalue { i16, i1 } %0, 0
+  %ov = extractvalue { i16, i1 } %0, 1
+  %tmp5 = zext i1 %ov to i32
   %tmp6 = add nuw nsw i32 %tmp5, %tmp1
   store i32 %tmp6, i32* %dst
   ret void
 }
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1