[llvm] 1d54e75 - [GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 29 18:41:07 PDT 2020
Author: Amara Emerson
Date: 2020-09-29T18:40:58-07:00
New Revision: 1d54e75cf26a4c60b66659d5d9c62f4bb9452b03
URL: https://github.com/llvm/llvm-project/commit/1d54e75cf26a4c60b66659d5d9c62f4bb9452b03
DIFF: https://github.com/llvm/llvm-project/commit/1d54e75cf26a4c60b66659d5d9c62f4bb9452b03.diff
LOG: [GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR.
During lowering of G_UMULO and friends, the previous code moved the builder's
insertion point to be after the legalizing instruction. When that happened, if
there happened to be a "G_CONSTANT i32 0" immediately after, the CSEMIRBuilder
would try to find that constant during the buildConstant(zero) call, and since
it dominates itself would return the iterator unchanged, even though the def
of the constant was *after* the current insertion point. This resulted in the
compare being generated *before* the constant which it was using.
There's no need to modify the insertion point before building the mul-hi or
constant. Delaying moving the insert point ensures those are built/CSEd before
the G_ICMP is built.
Fixes PR47679
Differential Revision: https://reviews.llvm.org/D88514
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e8bc4067c127..45ac2b7b6711 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2892,11 +2892,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
MI.RemoveOperand(1);
Observer.changedInstr(MI);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
-
auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ // Move insert point forward so we can use the Res register if needed.
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
// For *signed* multiply, overflow is detected by checking:
// (hi != (lo >> bitwidth-1))
if (Opcode == TargetOpcode::G_SMULH) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
index 84c839f7b341..20af216aaeb5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
@@ -28,8 +28,8 @@ body: |
; CHECK-LABEL: name: test_smul_overflow
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]]
+ ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]](s64)
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]]
@@ -51,9 +51,9 @@ body: |
; CHECK-LABEL: name: test_umul_overflow
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
- ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]]
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
; CHECK: $x0 = COPY [[MUL]](s64)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
@@ -91,3 +91,67 @@ body: |
$q0 = COPY %2(<2 x s64>)
RET_ReallyLR implicit $q0
...
+---
+name: test_umulo_overflow_no_invalid_mir
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 16
+stack:
+ - { id: 0, size: 8, alignment: 8 }
+ - { id: 1, size: 8, alignment: 8 }
+ - { id: 2, size: 16, alignment: 16 }
+ - { id: 3, size: 16, alignment: 8 }
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2
+ ; Check that the overflow result doesn't generate incorrect MIR by using a G_CONSTANT 0
+ ; before it's been defined.
+ ; CHECK-LABEL: name: test_umulo_overflow_no_invalid_mir
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3
+ ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8)
+ ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8)
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8)
+ ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]]
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
+ ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
+ ; CHECK: $x0 = COPY [[MUL]](s64)
+ ; CHECK: $x1 = COPY [[AND]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:_(p0) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %25:_(s32) = G_CONSTANT i32 0
+ %3:_(p0) = G_FRAME_INDEX %stack.0
+ %4:_(p0) = G_FRAME_INDEX %stack.1
+ %6:_(p0) = G_FRAME_INDEX %stack.3
+ G_STORE %2(s64), %3(p0) :: (store 8)
+ G_STORE %1(s64), %4(p0) :: (store 8)
+ %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8)
+ %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8)
+ %9:_(s64), %10:_(s1) = G_UMULO %7, %8
+ %31:_(s64) = G_CONSTANT i64 0
+ G_STORE %31(s64), %6(p0) :: (store 8, align 1)
+ %16:_(s64) = G_ZEXT %10(s1)
+ $x0 = COPY %9(s64)
+ $x1 = COPY %16(s64)
+ RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
index c92a55d0af32..b146aa5ff13d 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
@@ -439,9 +439,9 @@ body: |
; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3
- ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
index 659eadf181c0..f7250ccde898 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
@@ -180,13 +180,13 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) {
; MIPS32-LABEL: umul_with_overflow:
; MIPS32: # %bb.0:
-; MIPS32-NEXT: mul $1, $4, $5
; MIPS32-NEXT: multu $4, $5
-; MIPS32-NEXT: mfhi $2
-; MIPS32-NEXT: sltu $2, $zero, $2
-; MIPS32-NEXT: andi $2, $2, 1
-; MIPS32-NEXT: sb $2, 0($7)
-; MIPS32-NEXT: sw $1, 0($6)
+; MIPS32-NEXT: mfhi $1
+; MIPS32-NEXT: mul $2, $4, $5
+; MIPS32-NEXT: sltu $1, $zero, $1
+; MIPS32-NEXT: andi $1, $1, 1
+; MIPS32-NEXT: sb $1, 0($7)
+; MIPS32-NEXT: sw $2, 0($6)
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
%res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)
More information about the llvm-commits
mailing list