[llvm] ec864a5 - [AArch64][PeepholeOpt]Optimize ALU + compare to flag-setting ALU

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 27 10:56:02 PDT 2023


Author: Mingming Liu
Date: 2023-03-27T10:55:45-07:00
New Revision: ec864a537160288a9cf7aea965cf33b0851d6d55

URL: https://github.com/llvm/llvm-project/commit/ec864a537160288a9cf7aea965cf33b0851d6d55
DIFF: https://github.com/llvm/llvm-project/commit/ec864a537160288a9cf7aea965cf33b0851d6d55.diff

LOG: [AArch64][PeepholeOpt]Optimize ALU + compare to flag-setting ALU

The motivating example is in https://godbolt.org/z/45nbdYMK9
- For this example, `subs` is generated for the good case; `sub` followed by `cmp` is generated for the bad case. Since signed overflow is undefined behavior in C/C++ (indicated as `nsw` flag in LLVM IR), `subs` should be generated for the good case as well.

This patch relaxes one restriction from "quit optimization when V is used" to "continue if MI produces poison value when signed overflow occurs". This is not meant to be C/C++ specific since it looks at 'NoSWrap' since it looks at MachineInstr flags.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D146820

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
    llvm/test/CodeGen/AArch64/arm64-csel.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b1cfd684e18b8..69ada004e91ff 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1692,17 +1692,34 @@ static bool isSUBSRegImm(unsigned Opcode) {
 ///        MI and CmpInstr
 ///        or if MI opcode is not the S form there must be neither defs of flags
 ///        nor uses of flags between MI and CmpInstr.
-/// - and  C/V flags are not used after CmpInstr
+/// - and, if C/V flags are not used after CmpInstr
+///        or if N flag is used but MI produces poison value if signed overflow
+///        occurs.
 static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
                                        const TargetRegisterInfo &TRI) {
+  // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
+  // that may or may not set flags.
   assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
 
   const unsigned CmpOpcode = CmpInstr.getOpcode();
   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
     return false;
 
+  assert((CmpInstr.getOperand(2).isImm() &&
+          CmpInstr.getOperand(2).getImm() == 0) &&
+         "Caller guarantees that CmpInstr compares with constant 0");
+
   std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
-  if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
+  if (!NZVCUsed || NZVCUsed->C)
+    return false;
+
+  // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+  // '%vreg = add ...' or '%vreg = sub ...'.
+  // Condition flag V is used to indicate signed overflow.
+  // 1) MI and CmpInstr set N and V to the same value.
+  // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
+  //    signed overflow occurs, so CmpInstr could still be simplified away.
+  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
     return false;
 
   AccessKind AccessToCheck = AK_Write;

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
index bceec8d77db80..c24ef372a5907 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
@@ -7,8 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define i32 @sub_icmp_i32(i32 %0, i32 %1) {
 ; CHECK-LABEL: sub_icmp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w0, w0, w1
-; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    subs w0, w0, w1
 ; CHECK-NEXT:    b.le .LBB0_2
 ; CHECK-NEXT:  // %bb.1:
 ; CHECK-NEXT:    b _Z2f2i
@@ -36,8 +35,7 @@ define i32 @sub_icmp_i32(i32 %0, i32 %1) {
 define i64 @sub_icmp_i64(i64 %0, i64 %1) {
 ; CHECK-LABEL: sub_icmp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub x0, x0, x1
-; CHECK-NEXT:    cmp x0, #0
+; CHECK-NEXT:    subs x0, x0, x1
 ; CHECK-NEXT:    b.le .LBB1_2
 ; CHECK-NEXT:  // %bb.1:
 ; CHECK-NEXT:    b _Z2f4l
@@ -63,8 +61,7 @@ define i64 @sub_icmp_i64(i64 %0, i64 %1) {
 define i64 @add_i64(i64 %0, i64 %1) {
 ; CHECK-LABEL: add_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x0, x1, x0
-; CHECK-NEXT:    cmp x0, #0
+; CHECK-NEXT:    adds x0, x1, x0
 ; CHECK-NEXT:    b.le .LBB2_2
 ; CHECK-NEXT:  // %bb.1:
 ; CHECK-NEXT:    b _Z2f4l
@@ -90,8 +87,7 @@ define i64 @add_i64(i64 %0, i64 %1) {
 define i32 @add_i32(i32 %0, i32 %1) {
 ; CHECK-LABEL: add_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w0, w1, w0
-; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    adds w0, w1, w0
 ; CHECK-NEXT:    b.le .LBB3_2
 ; CHECK-NEXT:  // %bb.1:
 ; CHECK-NEXT:    b _Z2f4l

diff  --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll
index 246d96f488de3..dd721e736f844 100644
--- a/llvm/test/CodeGen/AArch64/arm64-csel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll
@@ -79,8 +79,7 @@ entry:
 define i32 at foo6(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-LABEL: foo6:
 ; CHECK:       // %bb.0: // %common.ret
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    cmp w8, #0
+; CHECK-NEXT:    subs w8, w0, w1
 ; CHECK-NEXT:    csinc w0, w8, wzr, le
 ; CHECK-NEXT:    ret
   %sub = sub nsw i32 %a, %b


        


More information about the llvm-commits mailing list