[llvm] d624b92 - [RISCV] Add precommit tests for D143646

Fri Feb 10 03:39:03 PST 2023

Author: LiaoChunyu
Date: 2023-02-10T19:37:28+08:00
New Revision: d624b9217d35740051e91066fd1d59bff201ee6a

URL: https://github.com/llvm/llvm-project/commit/d624b9217d35740051e91066fd1d59bff201ee6a
DIFF: https://github.com/llvm/llvm-project/commit/d624b9217d35740051e91066fd1d59bff201ee6a.diff

LOG: [RISCV] Add precommit tests for D143646

Added: 
    llvm/test/CodeGen/RISCV/overflow-intrinsics.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
new file mode 100644
index 0000000000000..a9124e4118923

--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -0,0 +1,1286 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV64
+
+;Copy tests from llvm/tests/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+;to test shouldFormOverflowOp on RISCV
+
+define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
+; RV32-LABEL: uaddo1_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a5, a3, a1
+; RV32-NEXT:    add a4, a2, a0
+; RV32-NEXT:    sltu a6, a4, a2
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    beq a5, a1, .LBB0_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a5, a1
+; RV32-NEXT:    beqz a0, .LBB0_3
+; RV32-NEXT:    j .LBB0_4
+; RV32-NEXT:  .LBB0_2:
+; RV32-NEXT:    sltu a0, a4, a0
+; RV32-NEXT:    bnez a0, .LBB0_4
+; RV32-NEXT:  .LBB0_3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB0_4:
+; RV32-NEXT:    neg a1, a0
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo1_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a2, a1, a0
+; RV64-NEXT:    bltu a2, a0, .LBB0_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB0_2:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
+; RV32-LABEL: uaddo1_math_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a5, a3, a1
+; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    sltu a1, a0, a2
+; RV32-NEXT:    add a5, a5, a1
+; RV32-NEXT:    beq a5, a3, .LBB1_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a1, a5, a3
+; RV32-NEXT:  .LBB1_2:
+; RV32-NEXT:    bnez a1, .LBB1_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB1_4:
+; RV32-NEXT:    neg a1, a1
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    sw a0, 0(a4)
+; RV32-NEXT:    sw a5, 4(a4)
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo1_math_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    bltu a0, a1, .LBB1_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB1_2:
+; RV64-NEXT:    sd a0, 0(a2)
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  store i64 %add, ptr %res
+  ret i64 %Q
+}
+
+define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
+; RV32-LABEL: uaddo2_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    beq a1, a3, .LBB2_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:  .LBB2_2:
+; RV32-NEXT:    bnez a0, .LBB2_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB2_4:
+; RV32-NEXT:    neg a1, a0
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo2_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    bltu a0, a1, .LBB2_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB2_2:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
+; RV32-LABEL: uaddo2_math_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a5, a3, a1
+; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    sltu a1, a0, a2
+; RV32-NEXT:    add a5, a5, a1
+; RV32-NEXT:    beq a5, a3, .LBB3_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a1, a5, a3
+; RV32-NEXT:  .LBB3_2:
+; RV32-NEXT:    bnez a1, .LBB3_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB3_4:
+; RV32-NEXT:    neg a1, a1
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    sw a0, 0(a4)
+; RV32-NEXT:    sw a5, 4(a4)
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo2_math_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    bltu a0, a1, .LBB3_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB3_2:
+; RV64-NEXT:    sd a0, 0(a2)
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  store i64 %add, ptr %res
+  ret i64 %Q
+}
+
+define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
+; RV32-LABEL: uaddo3_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    beq a3, a1, .LBB4_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:  .LBB4_2:
+; RV32-NEXT:    bnez a0, .LBB4_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB4_4:
+; RV32-NEXT:    neg a1, a0
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo3_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    bltu a0, a1, .LBB4_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB4_2:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
+; RV32-LABEL: uaddo3_math_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a5, a3, a1
+; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    sltu a1, a0, a2
+; RV32-NEXT:    add a5, a5, a1
+; RV32-NEXT:    beq a5, a3, .LBB5_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a1, a5, a3
+; RV32-NEXT:  .LBB5_2:
+; RV32-NEXT:    bnez a1, .LBB5_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB5_4:
+; RV32-NEXT:    neg a1, a1
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    sw a0, 0(a4)
+; RV32-NEXT:    sw a5, 4(a4)
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo3_math_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    bltu a0, a1, .LBB5_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB5_2:
+; RV64-NEXT:    sd a0, 0(a2)
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  store i64 %add, ptr %res
+  ret i64 %Q
+}
+
+; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
+
+define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+; RV32-LABEL: uaddo4:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    andi a4, a4, 1
+; RV32-NEXT:    beqz a4, .LBB6_6
+; RV32-NEXT:  # %bb.1: # %next
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    beq a3, a1, .LBB6_3
+; RV32-NEXT:  # %bb.2: # %next
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:  .LBB6_3: # %next
+; RV32-NEXT:    bnez a0, .LBB6_5
+; RV32-NEXT:  # %bb.4: # %next
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB6_5: # %next
+; RV32-NEXT:    neg a1, a0
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB6_6: # %exit
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo4:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    andi a2, a2, 1
+; RV64-NEXT:    beqz a2, .LBB6_4
+; RV64-NEXT:  # %bb.1: # %next
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    bltu a0, a1, .LBB6_3
+; RV64-NEXT:  # %bb.2: # %next
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB6_3: # %next
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB6_4: # %exit
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+exit:
+  ret i64 0
+}
+
+define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
+; RV32-LABEL: uaddo5:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    andi a5, a5, 1
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a6, a2, a0
+; RV32-NEXT:    sltu a0, a6, a2
+; RV32-NEXT:    add a1, a1, a0
+; RV32-NEXT:    sw a6, 0(a4)
+; RV32-NEXT:    sw a1, 4(a4)
+; RV32-NEXT:    beqz a5, .LBB7_6
+; RV32-NEXT:  # %bb.1: # %next
+; RV32-NEXT:    beq a3, a1, .LBB7_3
+; RV32-NEXT:  # %bb.2: # %next
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:  .LBB7_3: # %next
+; RV32-NEXT:    bnez a0, .LBB7_5
+; RV32-NEXT:  # %bb.4: # %next
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB7_5: # %next
+; RV32-NEXT:    neg a1, a0
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB7_6: # %exit
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo5:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    andi a3, a3, 1
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sd a0, 0(a2)
+; RV64-NEXT:    beqz a3, .LBB7_4
+; RV64-NEXT:  # %bb.1: # %next
+; RV64-NEXT:    bltu a0, a1, .LBB7_3
+; RV64-NEXT:  # %bb.2: # %next
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:  .LBB7_3: # %next
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB7_4: # %exit
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+entry:
+  %add = add i64 %b, %a
+  store i64 %add, ptr %ptr
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+exit:
+  ret i64 0
+}
+
+; Instcombine folds (a + b <u a)  to (a ^ -1 <u b). Make sure we match this
+; pattern as well.
+define i64 @uaddo6_xor(i64 %a, i64 %b) {
+; RV32-LABEL: uaddo6_xor:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a1, a1
+; RV32-NEXT:    beq a1, a3, .LBB8_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:    beqz a0, .LBB8_3
+; RV32-NEXT:    j .LBB8_4
+; RV32-NEXT:  .LBB8_2:
+; RV32-NEXT:    not a0, a0
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    bnez a0, .LBB8_4
+; RV32-NEXT:  .LBB8_3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB8_4:
+; RV32-NEXT:    neg a1, a0
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo6_xor:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a2, a0
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    bltu a2, a1, .LBB8_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a0, 42
+; RV64-NEXT:  .LBB8_2:
+; RV64-NEXT:    ret
+  %x = xor i64 %a, -1
+  %cmp = icmp ult i64 %x, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
+; RV32-LABEL: uaddo6_xor_commuted:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a1, a1
+; RV32-NEXT:    beq a1, a3, .LBB9_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:    beqz a0, .LBB9_3
+; RV32-NEXT:    j .LBB9_4
+; RV32-NEXT:  .LBB9_2:
+; RV32-NEXT:    not a0, a0
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    bnez a0, .LBB9_4
+; RV32-NEXT:  .LBB9_3:
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:  .LBB9_4:
+; RV32-NEXT:    neg a1, a0
+; RV32-NEXT:    and a1, a1, a3
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo6_xor_commuted:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a2, a0
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    bltu a2, a1, .LBB9_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a0, 42
+; RV64-NEXT:  .LBB9_2:
+; RV64-NEXT:    ret
+  %x = xor i64 %a, -1
+  %cmp = icmp ult i64 %x, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+declare void @use(i64)
+
+define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
+; RV32-LABEL: uaddo6_xor_multi_use:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    .cfi_offset s1, -12
+; RV32-NEXT:    mv s0, a2
+; RV32-NEXT:    not a1, a1
+; RV32-NEXT:    not a0, a0
+; RV32-NEXT:    beq a1, a3, .LBB10_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a2, a1, a3
+; RV32-NEXT:    beqz a2, .LBB10_3
+; RV32-NEXT:    j .LBB10_4
+; RV32-NEXT:  .LBB10_2:
+; RV32-NEXT:    sltu a2, a0, s0
+; RV32-NEXT:    bnez a2, .LBB10_4
+; RV32-NEXT:  .LBB10_3:
+; RV32-NEXT:    li s0, 42
+; RV32-NEXT:  .LBB10_4:
+; RV32-NEXT:    neg s1, a2
+; RV32-NEXT:    and s1, s1, a3
+; RV32-NEXT:    call use at plt
+; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv a1, s1
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo6_xor_multi_use:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    not a0, a0
+; RV64-NEXT:    mv s0, a1
+; RV64-NEXT:    bltu a0, a1, .LBB10_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li s0, 42
+; RV64-NEXT:  .LBB10_2:
+; RV64-NEXT:    call use at plt
+; RV64-NEXT:    mv a0, s0
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %x = xor i64 -1, %a
+  %cmp = icmp ult i64 %x, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  call void @use(i64 %x)
+  ret i64 %Q
+}
+
+; Make sure we do not use the XOR binary operator as insert point, as it may
+; come before the second operand of the overflow intrinsic.
+define i1 @uaddo6_xor_op_after_XOR(i32 %a, ptr %b.ptr) {
+; RV32-LABEL: uaddo6_xor_op_after_XOR:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    not a0, a0
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo6_xor_op_after_XOR:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lw a1, 0(a1)
+; RV64-NEXT:    not a0, a0
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    ret
+  %x = xor i32 %a, -1
+  %b = load i32, ptr %b.ptr, align 8
+  %cmp14 = icmp ugt i32 %b, %x
+  %ov = xor i1 %cmp14, true
+  ret i1 %ov
+}
+
+; When adding 1, the general pattern for add-overflow may be 
diff erent due to icmp canonicalization.
+; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754
+
+define i1 @uaddo_i64_increment(i64 %x, ptr %p) {
+; RV32-LABEL: uaddo_i64_increment:
+; RV32:       # %bb.0:
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:    addi a4, a0, 1
+; RV32-NEXT:    sltu a0, a4, a0
+; RV32-NEXT:    add a5, a1, a0
+; RV32-NEXT:    bgeu a4, a3, .LBB12_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a5, a1
+; RV32-NEXT:  .LBB12_2:
+; RV32-NEXT:    sw a4, 0(a2)
+; RV32-NEXT:    sw a5, 4(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i64_increment:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+  %a = add i64 %x, 1
+  %ov = icmp eq i64 %a, 0
+  store i64 %a, ptr %p
+  ret i1 %ov
+}
+
+define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) {
+; RV32-LABEL: uaddo_i8_increment_noncanonical_1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    andi a0, a0, 255
+; RV32-NEXT:    addi a2, a0, 1
+; RV32-NEXT:    andi a0, a2, 255
+; RV32-NEXT:    xor a0, a0, a2
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    sb a2, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i8_increment_noncanonical_1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    andi a0, a0, 255
+; RV64-NEXT:    addi a2, a0, 1
+; RV64-NEXT:    andi a0, a2, 255
+; RV64-NEXT:    xor a0, a0, a2
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sb a2, 0(a1)
+; RV64-NEXT:    ret
+  %a = add i8 1, %x        ; commute
+  %ov = icmp eq i8 %a, 0
+  store i8 %a, ptr %p
+  ret i1 %ov
+}
+
+define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, ptr %p) {
+; RV32-LABEL: uaddo_i32_increment_noncanonical_2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a2, a0, 1
+; RV32-NEXT:    seqz a0, a2
+; RV32-NEXT:    sw a2, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i32_increment_noncanonical_2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addiw a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    ret
+  %a = add i32 %x, 1
+  %ov = icmp eq i32 0, %a   ; commute
+  store i32 %a, ptr %p
+  ret i1 %ov
+}
+
+define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
+; RV32-LABEL: uaddo_i16_increment_noncanonical_3:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a2, 16
+; RV32-NEXT:    addi a2, a2, -1
+; RV32-NEXT:    and a0, a0, a2
+; RV32-NEXT:    addi a3, a0, 1
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    xor a2, a2, a3
+; RV32-NEXT:    snez a0, a2
+; RV32-NEXT:    sh a3, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i16_increment_noncanonical_3:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a2, 16
+; RV64-NEXT:    addiw a2, a2, -1
+; RV64-NEXT:    and a0, a0, a2
+; RV64-NEXT:    addi a3, a0, 1
+; RV64-NEXT:    and a2, a3, a2
+; RV64-NEXT:    xor a2, a2, a3
+; RV64-NEXT:    snez a0, a2
+; RV64-NEXT:    sh a3, 0(a1)
+; RV64-NEXT:    ret
+  %a = add i16 1, %x        ; commute
+  %ov = icmp eq i16 0, %a   ; commute
+  store i16 %a, ptr %p
+  ret i1 %ov
+}
+
+; The overflow check may be against the input rather than the sum.
+
+define i1 @uaddo_i64_increment_alt(i64 %x, ptr %p) {
+; RV32-LABEL: uaddo_i64_increment_alt:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a3, a0, 1
+; RV32-NEXT:    sltu a4, a3, a0
+; RV32-NEXT:    add a4, a1, a4
+; RV32-NEXT:    sw a3, 0(a2)
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    addi a0, a0, 1
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    sw a4, 4(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i64_increment_alt:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+  %a = add i64 %x, 1
+  store i64 %a, ptr %p
+  %ov = icmp eq i64 %x, -1
+  ret i1 %ov
+}
+
+; Make sure insertion is done correctly based on dominance.
+
+define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) {
+; RV32-LABEL: uaddo_i64_increment_alt_dom:
+; RV32:       # %bb.0:
+; RV32-NEXT:    and a3, a0, a1
+; RV32-NEXT:    addi a3, a3, 1
+; RV32-NEXT:    seqz a3, a3
+; RV32-NEXT:    addi a4, a0, 1
+; RV32-NEXT:    sltu a0, a4, a0
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sw a4, 0(a2)
+; RV32-NEXT:    sw a0, 4(a2)
+; RV32-NEXT:    mv a0, a3
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i64_increment_alt_dom:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a2, a0, 1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+  %ov = icmp eq i64 %x, -1
+  %a = add i64 %x, 1
+  store i64 %a, ptr %p
+  ret i1 %ov
+}
+
+; The overflow check may be against the input rather than the sum.
+
+define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) {
+; RV32-LABEL: uaddo_i64_decrement_alt:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a3, a0, -1
+; RV32-NEXT:    sltu a4, a3, a0
+; RV32-NEXT:    add a4, a1, a4
+; RV32-NEXT:    addi a4, a4, -1
+; RV32-NEXT:    sw a3, 0(a2)
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    sw a4, 4(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i64_decrement_alt:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a2, a0, -1
+; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    sd a2, 0(a1)
+; RV64-NEXT:    ret
+  %a = add i64 %x, -1
+  store i64 %a, ptr %p
+  %ov = icmp ne i64 %x, 0
+  ret i1 %ov
+}
+
+; Make sure insertion is done correctly based on dominance.
+
+define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) {
+; RV32-LABEL: uaddo_i64_decrement_alt_dom:
+; RV32:       # %bb.0:
+; RV32-NEXT:    or a3, a0, a1
+; RV32-NEXT:    snez a3, a3
+; RV32-NEXT:    addi a4, a0, -1
+; RV32-NEXT:    sltu a0, a4, a0
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    sw a4, 0(a2)
+; RV32-NEXT:    sw a0, 4(a2)
+; RV32-NEXT:    mv a0, a3
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i64_decrement_alt_dom:
+; RV64:       # %bb.0:
+; RV64-NEXT:    snez a2, a0
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    sd a0, 0(a1)
+; RV64-NEXT:    mv a0, a2
+; RV64-NEXT:    ret
+  %ov = icmp ne i64 %x, 0
+  %a = add i64 %x, -1
+  store i64 %a, ptr %p
+  ret i1 %ov
+}
+
+; No transform for illegal types.
+
+define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) {
+; RV32-LABEL: uaddo_i42_increment_illegal_type:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a3, a0, 1
+; RV32-NEXT:    sltu a0, a3, a0
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    andi a1, a0, 1023
+; RV32-NEXT:    or a0, a3, a1
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    sw a3, 0(a2)
+; RV32-NEXT:    sh a1, 4(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: uaddo_i42_increment_illegal_type:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a2, a0, 1
+; RV64-NEXT:    slli a0, a2, 22
+; RV64-NEXT:    srli a3, a0, 22
+; RV64-NEXT:    seqz a0, a3
+; RV64-NEXT:    sw a2, 0(a1)
+; RV64-NEXT:    srli a3, a3, 32
+; RV64-NEXT:    sh a3, 4(a1)
+; RV64-NEXT:    ret
+  %a = add i42 %x, 1
+  %ov = icmp eq i42 %a, 0
+  store i42 %a, ptr %p
+  ret i1 %ov
+}
+
+define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
+; RV32-LABEL: usubo_ult_i64_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    beq a1, a3, .LBB21_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB21_2:
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ult_i64_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    ret
+  %s = sub i64 %x, %y
+  %ov = icmp ult i64 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
+; RV32-LABEL: usubo_ult_i64_math_overflow_used:
+; RV32:       # %bb.0:
+; RV32-NEXT:    mv a5, a0
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    sub a6, a1, a3
+; RV32-NEXT:    sub a6, a6, a0
+; RV32-NEXT:    sub a5, a5, a2
+; RV32-NEXT:    sw a5, 0(a4)
+; RV32-NEXT:    sw a6, 4(a4)
+; RV32-NEXT:    beq a1, a3, .LBB22_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:  .LBB22_2:
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ult_i64_math_overflow_used:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sub a3, a0, a1
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    sd a3, 0(a2)
+; RV64-NEXT:    ret
+  %s = sub i64 %x, %y
+  store i64 %s, ptr %p
+  %ov = icmp ult i64 %x, %y
+  ret i1 %ov
+}
+
+; Verify insertion point for single-BB. Toggle predicate.
+
+define i1 @usubo_ugt_i32(i32 %x, i32 %y, ptr %p) {
+; RV32-LABEL: usubo_ugt_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    sltu a3, a0, a1
+; RV32-NEXT:    sub a0, a0, a1
+; RV32-NEXT:    sw a0, 0(a2)
+; RV32-NEXT:    mv a0, a3
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ugt_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sext.w a3, a1
+; RV64-NEXT:    sext.w a4, a0
+; RV64-NEXT:    sltu a3, a4, a3
+; RV64-NEXT:    subw a0, a0, a1
+; RV64-NEXT:    sw a0, 0(a2)
+; RV64-NEXT:    mv a0, a3
+; RV64-NEXT:    ret
+  %ov = icmp ugt i32 %y, %x
+  %s = sub i32 %x, %y
+  store i32 %s, ptr %p
+  ret i1 %ov
+}
+
+; Constant operand should match.
+
+define i1 @usubo_ugt_constant_op0_i8(i8 %x, ptr %p) {
+; RV32-LABEL: usubo_ugt_constant_op0_i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    andi a2, a0, 255
+; RV32-NEXT:    li a3, 42
+; RV32-NEXT:    sub a3, a3, a0
+; RV32-NEXT:    sltiu a0, a2, 43
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    sb a3, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ugt_constant_op0_i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    andi a2, a0, 255
+; RV64-NEXT:    li a3, 42
+; RV64-NEXT:    subw a3, a3, a0
+; RV64-NEXT:    sltiu a0, a2, 43
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sb a3, 0(a1)
+; RV64-NEXT:    ret
+  %s = sub i8 42, %x
+  %ov = icmp ugt i8 %x, 42
+  store i8 %s, ptr %p
+  ret i1 %ov
+}
+
+; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form.
+
+define i1 @usubo_ult_constant_op0_i16(i16 %x, ptr %p) {
+; RV32-LABEL: usubo_ult_constant_op0_i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a2, a0, 16
+; RV32-NEXT:    srli a2, a2, 16
+; RV32-NEXT:    li a3, 43
+; RV32-NEXT:    sub a3, a3, a0
+; RV32-NEXT:    sltiu a0, a2, 44
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    sh a3, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ult_constant_op0_i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a2, a0, 48
+; RV64-NEXT:    srli a2, a2, 48
+; RV64-NEXT:    li a3, 43
+; RV64-NEXT:    subw a3, a3, a0
+; RV64-NEXT:    sltiu a0, a2, 44
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sh a3, 0(a1)
+; RV64-NEXT:    ret
+  %s = sub i16 43, %x
+  %ov = icmp ult i16 43, %x
+  store i16 %s, ptr %p
+  ret i1 %ov
+}
+
+; Subtract with constant operand 1 is canonicalized to add.
+
+define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) {
+; RV32-LABEL: usubo_ult_constant_op1_i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a2, a0, 16
+; RV32-NEXT:    srli a2, a2, 16
+; RV32-NEXT:    addi a3, a0, -44
+; RV32-NEXT:    sltiu a0, a2, 44
+; RV32-NEXT:    sh a3, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ult_constant_op1_i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a2, a0, 48
+; RV64-NEXT:    srli a2, a2, 48
+; RV64-NEXT:    addiw a3, a0, -44
+; RV64-NEXT:    sltiu a0, a2, 44
+; RV64-NEXT:    sh a3, 0(a1)
+; RV64-NEXT:    ret
+  %s = add i16 %x, -44
+  %ov = icmp ult i16 %x, 44
+  store i16 %s, ptr %p
+  ret i1 %ov
+}
+
+define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) {
+; RV32-LABEL: usubo_ugt_constant_op1_i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    andi a2, a0, 255
+; RV32-NEXT:    sltiu a2, a2, 45
+; RV32-NEXT:    addi a0, a0, -45
+; RV32-NEXT:    sb a0, 0(a1)
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ugt_constant_op1_i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    andi a2, a0, 255
+; RV64-NEXT:    sltiu a2, a2, 45
+; RV64-NEXT:    addiw a0, a0, -45
+; RV64-NEXT:    sb a0, 0(a1)
+; RV64-NEXT:    mv a0, a2
+; RV64-NEXT:    ret
+  %ov = icmp ugt i8 45, %x
+  %s = add i8 %x, -45
+  store i8 %s, ptr %p
+  ret i1 %ov
+}
+
+; Special-case: subtract 1 changes the compare predicate and constant.
+
+define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) {
+; RV32-LABEL: usubo_eq_constant1_op1_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a2, a0, -1
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    sw a2, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_eq_constant1_op1_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sext.w a2, a0
+; RV64-NEXT:    addiw a3, a0, -1
+; RV64-NEXT:    seqz a0, a2
+; RV64-NEXT:    sw a3, 0(a1)
+; RV64-NEXT:    ret
+  %s = add i32 %x, -1
+  %ov = icmp eq i32 %x, 0
+  store i32 %s, ptr %p
+  ret i1 %ov
+}
+
+; Special-case: subtract from 0 (negate) changes the compare predicate.
+
+define i1 @usubo_ne_constant0_op1_i32(i32 %x, ptr %p) {
+; RV32-LABEL: usubo_ne_constant0_op1_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    neg a2, a0
+; RV32-NEXT:    snez a0, a0
+; RV32-NEXT:    sw a2, 0(a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ne_constant0_op1_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sext.w a2, a0
+; RV64-NEXT:    negw a3, a0
+; RV64-NEXT:    snez a0, a2
+; RV64-NEXT:    sw a3, 0(a1)
+; RV64-NEXT:    ret
+  %s = sub i32 0, %x
+  %ov = icmp ne i32 %x, 0
+  store i32 %s, ptr %p
+  ret i1 %ov
+}
+
+; This used to verify insertion point for multi-BB, but now we just bail out.
+
+declare void @call(i1)
+
+define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
+; RV32-LABEL: usubo_ult_sub_dominates_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    andi a7, a5, 1
+; RV32-NEXT:    beqz a7, .LBB30_5
+; RV32-NEXT:  # %bb.1: # %t
+; RV32-NEXT:    mv a6, a0
+; RV32-NEXT:    sltu a0, a0, a2
+; RV32-NEXT:    sub t0, a1, a3
+; RV32-NEXT:    sub t0, t0, a0
+; RV32-NEXT:    sub a2, a6, a2
+; RV32-NEXT:    sw a2, 0(a4)
+; RV32-NEXT:    sw t0, 4(a4)
+; RV32-NEXT:    beqz a7, .LBB30_5
+; RV32-NEXT:  # %bb.2: # %end
+; RV32-NEXT:    beq a1, a3, .LBB30_4
+; RV32-NEXT:  # %bb.3: # %end
+; RV32-NEXT:    sltu a0, a1, a3
+; RV32-NEXT:  .LBB30_4: # %end
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB30_5: # %f
+; RV32-NEXT:    mv a0, a5
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ult_sub_dominates_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    andi a4, a3, 1
+; RV64-NEXT:    beqz a4, .LBB30_3
+; RV64-NEXT:  # %bb.1: # %t
+; RV64-NEXT:    sub a5, a0, a1
+; RV64-NEXT:    sd a5, 0(a2)
+; RV64-NEXT:    beqz a4, .LBB30_3
+; RV64-NEXT:  # %bb.2: # %end
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB30_3: # %f
+; RV64-NEXT:    mv a0, a3
+; RV64-NEXT:    ret
+entry:
+  br i1 %cond, label %t, label %f
+
+t:
+  %s = sub i64 %x, %y
+  store i64 %s, ptr %p
+  br i1 %cond, label %end, label %f
+
+f:
+  ret i1 %cond
+
+end:
+  %ov = icmp ult i64 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
+; RV32-LABEL: usubo_ult_cmp_dominates_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    .cfi_offset s1, -12
+; RV32-NEXT:    .cfi_offset s2, -16
+; RV32-NEXT:    .cfi_offset s3, -20
+; RV32-NEXT:    .cfi_offset s4, -24
+; RV32-NEXT:    .cfi_offset s5, -28
+; RV32-NEXT:    .cfi_offset s6, -32
+; RV32-NEXT:    mv s4, a5
+; RV32-NEXT:    andi a5, a5, 1
+; RV32-NEXT:    beqz a5, .LBB31_8
+; RV32-NEXT:  # %bb.1: # %t
+; RV32-NEXT:    mv s0, a4
+; RV32-NEXT:    mv s3, a3
+; RV32-NEXT:    mv s1, a2
+; RV32-NEXT:    mv s5, a1
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    beq a1, a3, .LBB31_3
+; RV32-NEXT:  # %bb.2: # %t
+; RV32-NEXT:    sltu s6, s5, s3
+; RV32-NEXT:    j .LBB31_4
+; RV32-NEXT:  .LBB31_3:
+; RV32-NEXT:    sltu s6, s2, s1
+; RV32-NEXT:  .LBB31_4: # %t
+; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    call call at plt
+; RV32-NEXT:    beqz s6, .LBB31_8
+; RV32-NEXT:  # %bb.5: # %end
+; RV32-NEXT:    sltu a1, s2, s1
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    beq s5, s3, .LBB31_7
+; RV32-NEXT:  # %bb.6: # %end
+; RV32-NEXT:    sltu a0, s5, s3
+; RV32-NEXT:  .LBB31_7: # %end
+; RV32-NEXT:    sub a2, s5, s3
+; RV32-NEXT:    sub a2, a2, a1
+; RV32-NEXT:    sub a1, s2, s1
+; RV32-NEXT:    sw a1, 0(s0)
+; RV32-NEXT:    sw a2, 4(s0)
+; RV32-NEXT:    j .LBB31_9
+; RV32-NEXT:  .LBB31_8: # %f
+; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:  .LBB31_9: # %f
+; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: usubo_ult_cmp_dominates_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    addi sp, sp, -48
+; RV64-NEXT:    .cfi_def_cfa_offset 48
+; RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    .cfi_offset s1, -24
+; RV64-NEXT:    .cfi_offset s2, -32
+; RV64-NEXT:    .cfi_offset s3, -40
+; RV64-NEXT:    .cfi_offset s4, -48
+; RV64-NEXT:    mv s0, a3
+; RV64-NEXT:    andi a3, a3, 1
+; RV64-NEXT:    beqz a3, .LBB31_3
+; RV64-NEXT:  # %bb.1: # %t
+; RV64-NEXT:    mv s1, a2
+; RV64-NEXT:    mv s2, a1
+; RV64-NEXT:    mv s3, a0
+; RV64-NEXT:    sltu s4, a0, a1
+; RV64-NEXT:    mv a0, s4
+; RV64-NEXT:    call call at plt
+; RV64-NEXT:    bgeu s3, s2, .LBB31_3
+; RV64-NEXT:  # %bb.2: # %end
+; RV64-NEXT:    sub a0, s3, s2
+; RV64-NEXT:    sd a0, 0(s1)
+; RV64-NEXT:    mv a0, s4
+; RV64-NEXT:    j .LBB31_4
+; RV64-NEXT:  .LBB31_3: # %f
+; RV64-NEXT:    mv a0, s0
+; RV64-NEXT:  .LBB31_4: # %f
+; RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 48
+; RV64-NEXT:    ret
+entry:
+  br i1 %cond, label %t, label %f
+
+t:
+  %ov = icmp ult i64 %x, %y
+  call void @call(i1 %ov)
+  br i1 %ov, label %end, label %f
+
+f:
+  ret i1 %cond
+
+end:
+  %s = sub i64 %x, %y
+  store i64 %s, ptr %p
+  ret i1 %ov
+}
+
+; Verify that crazy/non-canonical code does not crash.
+
+define void @bar() {
+; RV32-LABEL: bar:
+; RV32:       # %bb.0:
+;
+; RV64-LABEL: bar:
+; RV64:       # %bb.0:
+  %cmp = icmp eq i64 1, -1
+  %frombool = zext i1 %cmp to i8
+  unreachable
+}
+
+define void @foo() {
+; RV32-LABEL: foo:
+; RV32:       # %bb.0:
+;
+; RV64-LABEL: foo:
+; RV64:       # %bb.0:
+  %sub = add nsw i64 1, 1
+  %conv = trunc i64 %sub to i32
+  unreachable
+}
+
+; Similarly for usubo.
+
+define i1 @bar2() {
+; RV32-LABEL: bar2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: bar2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+  %cmp = icmp eq i64 1, 0
+  ret i1 %cmp
+}
+
+define i64 @foo2(ptr %p) {
+; RV32-LABEL: foo2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: foo2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a0, 0
+; RV64-NEXT:    ret
+  %sub = add nsw i64 1, -1
+  ret i64 %sub
+}
+
+; Avoid hoisting a math op into a dominating block which would
+; increase the critical path.
+
+define void @PR41129(ptr %p64) {
+; RV32-LABEL: PR41129:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lw a1, 4(a0)
+; RV32-NEXT:    lw a2, 0(a0)
+; RV32-NEXT:    or a3, a2, a1
+; RV32-NEXT:    beqz a3, .LBB36_2
+; RV32-NEXT:  # %bb.1: # %false
+; RV32-NEXT:    andi a2, a2, 7
+; RV32-NEXT:    sw zero, 4(a0)
+; RV32-NEXT:    sw a2, 0(a0)
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB36_2: # %true
+; RV32-NEXT:    addi a3, a2, -1
+; RV32-NEXT:    sltu a2, a3, a2
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    addi a1, a1, -1
+; RV32-NEXT:    sw a3, 0(a0)
+; RV32-NEXT:    sw a1, 4(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: PR41129:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    ld a1, 0(a0)
+; RV64-NEXT:    beqz a1, .LBB36_2
+; RV64-NEXT:  # %bb.1: # %false
+; RV64-NEXT:    andi a1, a1, 7
+; RV64-NEXT:    sd a1, 0(a0)
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB36_2: # %true
+; RV64-NEXT:    addi a1, a1, -1
+; RV64-NEXT:    sd a1, 0(a0)
+; RV64-NEXT:    ret
+entry:
+  %key = load i64, ptr %p64, align 8
+  %cond17 = icmp eq i64 %key, 0
+  br i1 %cond17, label %true, label %false
+
+false:
+  %andval = and i64 %key, 7
+  store i64 %andval, ptr %p64
+  br label %exit
+
+true:
+  %svalue = add i64 %key, -1
+  store i64 %svalue, ptr %p64
+  br label %exit
+
+exit:
+  ret void
+}
+