[llvm-branch-commits] [llvm] a3aee26 - [GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR.

Wed Sep 30 04:13:26 PDT 2020

Author: Amara Emerson
Date: 2020-09-30T13:05:48+02:00
New Revision: a3aee2678d07e249dca18493d2acd898eefd48dd

URL: https://github.com/llvm/llvm-project/commit/a3aee2678d07e249dca18493d2acd898eefd48dd
DIFF: https://github.com/llvm/llvm-project/commit/a3aee2678d07e249dca18493d2acd898eefd48dd.diff

LOG: [GlobalISel] Fix multiply with overflow intrinsics legalization generating invalid MIR.

During lowering of G_UMULO and friends, the previous code moved the builder's
insertion point to be after the legalizing instruction. When that happened, if
there happened to be a "G_CONSTANT i32 0" immediately after, the CSEMIRBuilder
would try to find that constant during the buildConstant(zero) call, and since
it dominates itself would return the iterator unchanged, even though the def
of the constant was *after* the current insertion point. This resulted in the
compare being generated *before* the constant which it was using.

There's no need to modify the insertion point before building the mul-hi or
constant. Delaying moving the insert point ensures those are built/CSEd before
the G_ICMP is built.

Fixes PR47679

Differential Revision: https://reviews.llvm.org/D88514

(cherry picked from commit 1d54e75cf26a4c60b66659d5d9c62f4bb9452b03)

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
    llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index da519f99ad7e..244e7a9583d6 100644

--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2368,11 +2368,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     MI.RemoveOperand(1);
     Observer.changedInstr(MI);
 
-    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
-
     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
     auto Zero = MIRBuilder.buildConstant(Ty, 0);
 
+    // Move insert point forward so we can use the Res register if needed.
+    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
     // For *signed* multiply, overflow is detected by checking:
     // (hi != (lo >> bitwidth-1))
     if (Opcode == TargetOpcode::G_SMULH) {

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
index 3260eb6ca6fd..187ddebd9804 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir
@@ -28,8 +28,8 @@ body:             |
     ; CHECK-LABEL: name: test_smul_overflow
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]]
+    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
     ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]]
     ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]]
@@ -51,9 +51,9 @@ body:             |
     ; CHECK-LABEL: name: test_umul_overflow
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]]
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
     ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
     ; CHECK: $x0 = COPY [[MUL]](s64)
     ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
@@ -66,3 +66,67 @@ body:             |
     $w0 = COPY %4(s32)
 
 ...
+---
+name:            test_umulo_overflow_no_invalid_mir
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+  - { reg: '$x2' }
+frameInfo:
+  maxAlignment:    16
+stack:
+  - { id: 0, size: 8, alignment: 8 }
+  - { id: 1, size: 8, alignment: 8 }
+  - { id: 2, size: 16, alignment: 16 }
+  - { id: 3, size: 16, alignment: 8 }
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; Check that the overflow result doesn't generate incorrect MIR by using a G_CONSTANT 0
+    ; before it's been defined.
+    ; CHECK-LABEL: name: test_umulo_overflow_no_invalid_mir
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+    ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+    ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3
+    ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8)
+    ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8)
+    ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]]
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]]
+    ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]]
+    ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]]
+    ; CHECK: $x0 = COPY [[MUL]](s64)
+    ; CHECK: $x1 = COPY [[AND]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %25:_(s32) = G_CONSTANT i32 0
+    %3:_(p0) = G_FRAME_INDEX %stack.0
+    %4:_(p0) = G_FRAME_INDEX %stack.1
+    %6:_(p0) = G_FRAME_INDEX %stack.3
+    G_STORE %2(s64), %3(p0) :: (store 8)
+    G_STORE %1(s64), %4(p0) :: (store 8)
+    %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8)
+    %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8)
+    %9:_(s64), %10:_(s1) = G_UMULO %7, %8
+    %31:_(s64) = G_CONSTANT i64 0
+    G_STORE %31(s64), %6(p0) :: (store 8, align 1)
+    %16:_(s64) = G_ZEXT %10(s1)
+    $x0 = COPY %9(s64)
+    $x1 = COPY %16(s64)
+    RET_ReallyLR implicit $x0
+
+...

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
index c92a55d0af32..b146aa5ff13d 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir
@@ -439,9 +439,9 @@ body:             |
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
     ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
     ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3
-    ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
     ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]]
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
     ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
     ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
index 659eadf181c0..f7250ccde898 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
@@ -180,13 +180,13 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
 define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) {
 ; MIPS32-LABEL: umul_with_overflow:
 ; MIPS32:       # %bb.0:
-; MIPS32-NEXT:    mul $1, $4, $5
 ; MIPS32-NEXT:    multu $4, $5
-; MIPS32-NEXT:    mfhi $2
-; MIPS32-NEXT:    sltu $2, $zero, $2
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    sb $2, 0($7)
-; MIPS32-NEXT:    sw $1, 0($6)
+; MIPS32-NEXT:    mfhi $1
+; MIPS32-NEXT:    mul $2, $4, $5
+; MIPS32-NEXT:    sltu $1, $zero, $1
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    sb $1, 0($7)
+; MIPS32-NEXT:    sw $2, 0($6)
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
   %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)