[llvm-branch-commits] [llvm] b643ee1 - [AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476)
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 18 20:07:40 PDT 2021
Author: Nikita Popov
Date: 2021-08-18T20:07:23-07:00
New Revision: b643ee1b9c1a8e0b81e31908a066c71851292890
URL: https://github.com/llvm/llvm-project/commit/b643ee1b9c1a8e0b81e31908a066c71851292890
DIFF: https://github.com/llvm/llvm-project/commit/b643ee1b9c1a8e0b81e31908a066c71851292890.diff
LOG: [AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476)
This is a non-intrusive fix for
https://bugs.llvm.org/show_bug.cgi?id=51476 intended for backport
to the 13.x release branch. It expands on the current hack by
distinguishing between CmpValue of 0, 1 and 2, where 0 and 1 have
the obvious meaning and 2 means "anything else". The new optimization
from D98564 should only be performed for CmpValue of 0 or 1.
For main, I think we should switch the analyzeCompare() and
optimizeCompare() APIs to use int64_t instead of int, which is in
line with MachineOperand's notion of an immediate, and avoids this
problem altogether.
Differential Revision: https://reviews.llvm.org/D108076
(cherry picked from commit 81b106584f2baf33e09be2362c35c1bf2f6bfe94)
Added:
llvm/test/CodeGen/AArch64/pr51476.ll
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b03d421d3e6de..091a62aa4ada7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
if (!MI.getOperand(1).isReg())
return false;
+ auto NormalizeCmpValue = [](int64_t Value) -> int {
+ // Comparison immediates may be 64-bit, but CmpValue is only an int.
+ // Normalize to 0/1/2 return value, where 2 indicates any value apart from
+ // 0 or 1.
+ // TODO: Switch CmpValue to int64_t in the API to avoid this.
+ if (Value == 0 || Value == 1)
+ return Value;
+ return 2;
+ };
+
switch (MI.getOpcode()) {
default:
break;
@@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME: In order to convert CmpValue to 0 or 1
- CmpValue = MI.getOperand(2).getImm() != 0;
+ CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
@@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
- // while the type of CmpValue is int. When converting uint64_t to int,
- // the high 32 bits of uint64_t will be lost.
- // In fact it causes a bug in spec2006-483.xalancbmk
- // CmpValue is only used to compare with zero in OptimizeCompareInstr
- CmpValue = AArch64_AM::decodeLogicalImmediate(
+ CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
MI.getOperand(2).getImm(),
- MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
+ MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
return true;
}
@@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
- // Continue only if we have a "ri" where immediate is zero.
- // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
- // function.
- assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
+ // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
+ assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
+ "CmpValue must be 0, 1, or 2!");
if (SrcReg2 != 0)
return false;
@@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
- if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
+ if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
return true;
- return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
+ return (CmpValue == 0 || CmpValue == 1) &&
+ removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
}
/// Get opcode of S version of Instr.
diff --git a/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir b/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir
index 4222e84b113cc..2098218d23f33 100644
--- a/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir
+++ b/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir
@@ -307,3 +307,42 @@ body: |
RET_ReallyLR
...
+---
+name: subswr_wrong_cmp_value
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: subswr_wrong_cmp_value
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $x1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+ ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv
+ ; CHECK: Bcc 1, %bb.2, implicit $nzcv
+ ; CHECK: B %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: B %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ liveins: $x1
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ %1:gpr64common = COPY $x1
+ %2:gpr64 = IMPLICIT_DEF
+ %3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv
+ %4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ %5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv
+ Bcc 1, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+ B %bb.2
+
+ bb.2:
+ RET_ReallyLR
+
+...
diff --git a/llvm/test/CodeGen/AArch64/pr51476.ll b/llvm/test/CodeGen/AArch64/pr51476.ll
new file mode 100644
index 0000000000000..6abd41a121546
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr51476.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(i8 %arg) nounwind {
+; CHECK-LABEL: test:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: cmp w8, #1
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: cmp w0, #3
+; CHECK-NEXT: strb w0, [sp, #12]
+; CHECK-NEXT: b.eq .LBB0_2
+; CHECK-NEXT: // %bb.1: // %do_call
+; CHECK-NEXT: bl unknown
+; CHECK-NEXT: .LBB0_2: // %common.ret
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %tmp = alloca i8
+ %cmp1 = icmp ne i8 %arg, 1
+ %zext = zext i1 %cmp1 to i8
+ store i8 %zext, i8* %tmp
+ %zext2 = load i8, i8* %tmp
+ %cmp2 = icmp eq i8 %zext2, 3
+ br i1 %cmp2, label %exit, label %do_call
+
+do_call:
+ call void @unknown(i8 %zext2)
+ ret void
+
+exit:
+ ret void
+}
+
+declare void @unknown(i8)
More information about the llvm-branch-commits
mailing list