[llvm] 2476e2a - [M68k] Optimize for overflow arithmetics that will never overflow
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 26 20:59:05 PST 2023
Author: Min-Yih Hsu
Date: 2023-12-26T20:55:23-08:00
New Revision: 2476e2a91140b57ca3ad0792597be4f4d20ddb1a
URL: https://github.com/llvm/llvm-project/commit/2476e2a91140b57ca3ad0792597be4f4d20ddb1a
DIFF: https://github.com/llvm/llvm-project/commit/2476e2a91140b57ca3ad0792597be4f4d20ddb1a.diff
LOG: [M68k] Optimize for overflow arithmetics that will never overflow
We lower overflow arithmetics to its M68kISD counterparts that produce
results of {i16/i32, i8} in which the second resut represents CCR. In
the event where we're certain there won't be an overflow, for instance
8 & 16-bit multiplications, we simply use zero in replacement of the
second result.
This patch replaces M68kISD::CMOV that takes this kind of zero or
all-ones CCR as condition value with its corresponding operand value.
Added:
Modified:
llvm/lib/Target/M68k/M68kISelLowering.cpp
llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 6ca5962965bdea..f42882dafa0956 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -1637,7 +1637,7 @@ SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
if (isa<ConstantSDNode>(CCR)) {
// It's likely a result of operations that will not overflow
// hence no setcc is needed.
- Overflow = DAG.getZExtOrTrunc(CCR, DL, N->getValueType(1));
+ Overflow = CCR;
} else {
// Generate a M68kISD::SETCC.
Overflow = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
@@ -2406,6 +2406,17 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Simple optimization when Cond is a constant to avoid generating
+ // M68kISD::CMOV if possible.
+ // TODO: Generalize this to use SelectionDAG::computeKnownBits.
+ if (auto *Const = dyn_cast<ConstantSDNode>(Cond.getNode())) {
+ const APInt &C = Const->getAPIntValue();
+ if (C.countr_zero() >= 5)
+ return Op2;
+ else if (C.countr_one() >= 5)
+ return Op1;
+ }
+
// M68kISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
diff --git a/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll b/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
index 485cc50fbee2b2..b649b2ba161478 100644
--- a/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
@@ -4,19 +4,28 @@
define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
; CHECK-LABEL: smul_i8:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: move.b (11,%sp), %d1
-; CHECK-NEXT: and.l #255, %d1
-; CHECK-NEXT: move.b (7,%sp), %d0
+; CHECK-NEXT: move.b (11,%sp), %d0
; CHECK-NEXT: and.l #255, %d0
-; CHECK-NEXT: muls %d1, %d0
-; CHECK-NEXT: move.b #0, %d1
-; CHECK-NEXT: move.w %d1, %ccr
-; CHECK-NEXT: bvs .LBB0_2
-; CHECK-NEXT: ; %bb.1: ; %entry
-; CHECK-NEXT: move.b #42, %d0
-; CHECK-NEXT: .LBB0_2: ; %entry
+; CHECK-NEXT: move.b (7,%sp), %d1
+; CHECK-NEXT: and.l #255, %d1
+; CHECK-NEXT: muls %d0, %d1
+; CHECK-NEXT: move.l %d1, %d0
+; CHECK-NEXT: and.l #65535, %d0
; CHECK-NEXT: and.l #255, %d0
; CHECK-NEXT: rts
+entry:
+ %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+ %cmp = extractvalue { i8, i1 } %smul, 1
+ %smul.result = extractvalue { i8, i1 } %smul, 0
+ %X = select i1 %cmp, i8 42, i8 %smul.result
+ ret i8 %X
+}
+
+define zeroext i8 @smul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: smul_i8_no_ovf:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: move.l #42, %d0
+; CHECK-NEXT: rts
entry:
%smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
%cmp = extractvalue { i8, i1 } %smul, 1
@@ -33,19 +42,13 @@ define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
; CHECK-NEXT: move.w (6,%sp), %d0
; CHECK-NEXT: move.w (10,%sp), %d1
; CHECK-NEXT: muls %d1, %d0
-; CHECK-NEXT: move.b #0, %d1
-; CHECK-NEXT: move.w %d1, %ccr
-; CHECK-NEXT: bvs .LBB1_2
-; CHECK-NEXT: ; %bb.1: ; %entry
-; CHECK-NEXT: move.w #42, %d0
-; CHECK-NEXT: .LBB1_2: ; %entry
; CHECK-NEXT: and.l #65535, %d0
; CHECK-NEXT: rts
entry:
%smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
%cmp = extractvalue { i16, i1 } %smul, 1
%smul.result = extractvalue { i16, i1 } %smul, 0
- %X = select i1 %cmp, i16 %smul.result, i16 42
+ %X = select i1 %cmp, i16 42, i16 %smul.result
ret i16 %X
}
@@ -62,7 +65,7 @@ define fastcc i1 @test1(i32 %v1, i32 %v2) nounwind {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: suba.l #12, %sp
; CHECK-NEXT: muls.l %d1, %d0
-; CHECK-NEXT: bvc .LBB2_1
+; CHECK-NEXT: bvc .LBB3_1
; CHECK-NEXT: ; %bb.2: ; %overflow
; CHECK-NEXT: lea (no,%pc), %a0
; CHECK-NEXT: move.l %a0, (%sp)
@@ -70,7 +73,7 @@ define fastcc i1 @test1(i32 %v1, i32 %v2) nounwind {
; CHECK-NEXT: move.b #0, %d0
; CHECK-NEXT: adda.l #12, %sp
; CHECK-NEXT: rts
-; CHECK-NEXT: .LBB2_1: ; %normal
+; CHECK-NEXT: .LBB3_1: ; %normal
; CHECK-NEXT: move.l %d0, (4,%sp)
; CHECK-NEXT: lea (ok,%pc), %a0
; CHECK-NEXT: move.l %a0, (%sp)
@@ -100,7 +103,7 @@ define fastcc i1 @test2(i32 %v1, i32 %v2) nounwind {
; CHECK-NEXT: muls.l %d1, %d0
; CHECK-NEXT: svs %d1
; CHECK-NEXT: sub.b #1, %d1
-; CHECK-NEXT: bne .LBB3_2
+; CHECK-NEXT: bne .LBB4_2
; CHECK-NEXT: ; %bb.1: ; %overflow
; CHECK-NEXT: lea (no,%pc), %a0
; CHECK-NEXT: move.l %a0, (%sp)
@@ -108,7 +111,7 @@ define fastcc i1 @test2(i32 %v1, i32 %v2) nounwind {
; CHECK-NEXT: move.b #0, %d0
; CHECK-NEXT: adda.l #12, %sp
; CHECK-NEXT: rts
-; CHECK-NEXT: .LBB3_2: ; %normal
+; CHECK-NEXT: .LBB4_2: ; %normal
; CHECK-NEXT: move.l %d0, (4,%sp)
; CHECK-NEXT: lea (ok,%pc), %a0
; CHECK-NEXT: move.l %a0, (%sp)
diff --git a/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll b/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
index 1dfb959e468ce8..fd128a3e52bd3e 100644
--- a/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
+++ b/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
@@ -4,19 +4,28 @@
define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
; CHECK-LABEL: umul_i8:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: move.b (11,%sp), %d1
-; CHECK-NEXT: and.l #255, %d1
-; CHECK-NEXT: move.b (7,%sp), %d0
+; CHECK-NEXT: move.b (11,%sp), %d0
; CHECK-NEXT: and.l #255, %d0
-; CHECK-NEXT: muls %d1, %d0
-; CHECK-NEXT: move.b #0, %d1
-; CHECK-NEXT: move.w %d1, %ccr
-; CHECK-NEXT: bvs .LBB0_2
-; CHECK-NEXT: ; %bb.1: ; %entry
-; CHECK-NEXT: move.b #42, %d0
-; CHECK-NEXT: .LBB0_2: ; %entry
+; CHECK-NEXT: move.b (7,%sp), %d1
+; CHECK-NEXT: and.l #255, %d1
+; CHECK-NEXT: muls %d0, %d1
+; CHECK-NEXT: move.l %d1, %d0
+; CHECK-NEXT: and.l #65535, %d0
; CHECK-NEXT: and.l #255, %d0
; CHECK-NEXT: rts
+entry:
+ %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+ %cmp = extractvalue { i8, i1 } %umul, 1
+ %umul.result = extractvalue { i8, i1 } %umul, 0
+ %X = select i1 %cmp, i8 42, i8 %umul.result
+ ret i8 %X
+}
+
+define zeroext i8 @umul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: umul_i8_no_ovf:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: move.l #42, %d0
+; CHECK-NEXT: rts
entry:
%umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
%cmp = extractvalue { i8, i1 } %umul, 1
@@ -33,19 +42,13 @@ define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
; CHECK-NEXT: move.w (6,%sp), %d0
; CHECK-NEXT: move.w (10,%sp), %d1
; CHECK-NEXT: muls %d1, %d0
-; CHECK-NEXT: move.b #0, %d1
-; CHECK-NEXT: move.w %d1, %ccr
-; CHECK-NEXT: bvs .LBB1_2
-; CHECK-NEXT: ; %bb.1: ; %entry
-; CHECK-NEXT: move.w #42, %d0
-; CHECK-NEXT: .LBB1_2: ; %entry
; CHECK-NEXT: and.l #65535, %d0
; CHECK-NEXT: rts
entry:
%umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
%cmp = extractvalue { i16, i1 } %umul, 1
%umul.result = extractvalue { i16, i1 } %umul, 0
- %X = select i1 %cmp, i16 %umul.result, i16 42
+ %X = select i1 %cmp, i16 42, i16 %umul.result
ret i16 %X
}
More information about the llvm-commits
mailing list