[llvm] 119c34e - [InstCombine][test] Add tests for mul combinations

Paweł Bylica via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 22 07:26:30 PDT 2022


Author: Paweł Bylica
Date: 2022-10-22T16:25:50+02:00
New Revision: 119c34e7f9c66dbdb77f69d67bb50507c91dc2ef

URL: https://github.com/llvm/llvm-project/commit/119c34e7f9c66dbdb77f69d67bb50507c91dc2ef
DIFF: https://github.com/llvm/llvm-project/commit/119c34e7f9c66dbdb77f69d67bb50507c91dc2ef.diff

LOG: [InstCombine][test] Add tests for mul combinations

Tests taken from https://reviews.llvm.org/D56214 and ported to
InstCombine for https://reviews.llvm.org/D136015.

Added: 
    llvm/test/Transforms/InstCombine/mul_full_32.ll
    llvm/test/Transforms/InstCombine/mul_full_64.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/InstCombine/mul_full_32.ll b/llvm/test/Transforms/InstCombine/mul_full_32.ll
new file mode 100644
index 000000000000..6a8d73525399
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mul_full_32.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+define { i64, i64 } @mul_full_64(i64 %x, i64 %y) {
+; CHECK-LABEL: @mul_full_64(
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X:%.*]], 4294967295
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
+; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[LO:%.*]] = or i64 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
+; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
+; CHECK-NEXT:    ret { i64, i64 } [[RES]]
+;
+  %xl = and i64 %x, 4294967295
+  %xh = lshr i64 %x, 32
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+
+  %t0 = mul nuw i64 %yl, %xl
+  %t1 = mul nuw i64 %yl, %xh
+  %t2 = mul nuw i64 %yh, %xl
+  %t3 = mul nuw i64 %yh, %xh
+
+  %t0l = and i64 %t0, 4294967295
+  %t0h = lshr i64 %t0, 32
+
+  %u0 = add i64 %t0h, %t1
+  %u0l = and i64 %u0, 4294967295
+  %u0h = lshr i64 %u0, 32
+
+  %u1 = add i64 %u0l, %t2
+  %u1ls = shl i64 %u1, 32
+  %u1h = lshr i64 %u1, 32
+
+  %u2 = add i64 %u0h, %t3
+
+  %lo = or i64 %u1ls, %t0l
+  %hi = add i64 %u2, %u1h
+
+  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
+  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
+  ret { i64, i64 } %res
+}
+
+define { i32, i32 } @mul_full_32(i32 %x, i32 %y) {
+; CHECK-LABEL: @mul_full_32(
+; CHECK-NEXT:    [[XL:%.*]] = and i32 [[X:%.*]], 65535
+; CHECK-NEXT:    [[XH:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT:    [[YL:%.*]] = and i32 [[Y:%.*]], 65535
+; CHECK-NEXT:    [[YH:%.*]] = lshr i32 [[Y]], 16
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i32 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i32 [[YL]], [[XH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i32 [[YH]], [[XL]]
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i32 [[YH]], [[XH]]
+; CHECK-NEXT:    [[T0L:%.*]] = and i32 [[T0]], 65535
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i32 [[T0]], 16
+; CHECK-NEXT:    [[U0:%.*]] = add i32 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i32 [[U0]], 65535
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i32 [[U0]], 16
+; CHECK-NEXT:    [[U1:%.*]] = add i32 [[U0L]], [[T2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i32 [[U1]], 16
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i32 [[U1]], 16
+; CHECK-NEXT:    [[U2:%.*]] = add i32 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[LO:%.*]] = or i32 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    [[HI:%.*]] = add i32 [[U2]], [[U1H]]
+; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i32, i32 } undef, i32 [[LO]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i32, i32 } [[RES_LO]], i32 [[HI]], 1
+; CHECK-NEXT:    ret { i32, i32 } [[RES]]
+;
+  %xl = and i32 %x, 65535
+  %xh = lshr i32 %x, 16
+  %yl = and i32 %y, 65535
+  %yh = lshr i32 %y, 16
+
+  %t0 = mul nuw i32 %yl, %xl
+  %t1 = mul nuw i32 %yl, %xh
+  %t2 = mul nuw i32 %yh, %xl
+  %t3 = mul nuw i32 %yh, %xh
+
+  %t0l = and i32 %t0, 65535
+  %t0h = lshr i32 %t0, 16
+
+  %u0 = add i32 %t0h, %t1
+  %u0l = and i32 %u0, 65535
+  %u0h = lshr i32 %u0, 16
+
+  %u1 = add i32 %u0l, %t2
+  %u1ls = shl i32 %u1, 16
+  %u1h = lshr i32 %u1, 16
+
+  %u2 = add i32 %u0h, %t3
+
+  %lo = or i32 %u1ls, %t0l
+  %hi = add i32 %u2, %u1h
+
+  %res_lo = insertvalue { i32, i32 } undef, i32 %lo, 0
+  %res = insertvalue { i32, i32 } %res_lo, i32 %hi, 1
+  ret { i32, i32 } %res
+}

diff  --git a/llvm/test/Transforms/InstCombine/mul_full_64.ll b/llvm/test/Transforms/InstCombine/mul_full_64.ll
new file mode 100644
index 000000000000..fa58271bae10
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mul_full_64.ll
@@ -0,0 +1,731 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define { i64, i64 } @mul_full_64_variant0(i64 %x, i64 %y) {
+; CHECK-LABEL: @mul_full_64_variant0(
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X:%.*]], 4294967295
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
+; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[LO:%.*]] = or i64 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
+; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
+; CHECK-NEXT:    ret { i64, i64 } [[RES]]
+;
+  %xl = and i64 %x, 4294967295
+  %xh = lshr i64 %x, 32
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+
+  %t0 = mul nuw i64 %yl, %xl
+  %t1 = mul nuw i64 %yl, %xh
+  %t2 = mul nuw i64 %yh, %xl
+  %t3 = mul nuw i64 %yh, %xh
+
+  %t0l = and i64 %t0, 4294967295
+  %t0h = lshr i64 %t0, 32
+
+  %u0 = add i64 %t0h, %t1
+  %u0l = and i64 %u0, 4294967295
+  %u0h = lshr i64 %u0, 32
+
+  %u1 = add i64 %u0l, %t2
+  %u1ls = shl i64 %u1, 32
+  %u1h = lshr i64 %u1, 32
+
+  %u2 = add i64 %u0h, %t3
+
+  %lo = or i64 %u1ls, %t0l
+  %hi = add i64 %u2, %u1h
+
+  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
+  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
+  ret { i64, i64 } %res
+}
+
+; The following variants 1 - 3 are generated with this C++ program:
+;
+; #include <stdint.h>
+;
+; uint64_t mulxu(uint64_t a, uint64_t b, uint64_t *rhi) {
+;     auto hi = [](uint64_t x) { return x >> 32; };
+;     auto lo = [](uint64_t x) { return uint32_t(x); };
+;     uint64_t xl = lo(a);
+;     uint64_t xh = hi(a);
+;     uint64_t yl = lo(b);
+;     uint64_t yh = hi(b);
+;
+;     uint64_t rhh = xh * yh;
+;     uint64_t rhl = xh * yl;
+;     uint64_t rlh = xl * yh;
+;     uint64_t rll = xl * yl;
+;
+;     *rhi = rhh + hi(rhl + hi(rll)) + hi((rlh + lo(rhl + hi(rll))));
+; #if ONE
+;     return a*b;
+; #elif TWO
+;     return (uint64_t(lo(rlh + lo(rhl + hi(rll)))) << 32) + lo(rll);
+; #elif THREE
+;     return ((rlh + rhl) << 32) + rll;
+; #endif
+; }
+
+define i64 @mul_full_64_variant1(i64 %a, i64 %b, i64* nocapture %rhi) {
+; CHECK-LABEL: @mul_full_64_variant1(
+; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
+; CHECK-NEXT:    [[SHR_I43:%.*]] = lshr i64 [[A]], 32
+; CHECK-NEXT:    [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295
+; CHECK-NEXT:    [[SHR_I41:%.*]] = lshr i64 [[B]], 32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i64 [[SHR_I41]], [[SHR_I43]]
+; CHECK-NEXT:    [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I43]]
+; CHECK-NEXT:    [[MUL6:%.*]] = mul nuw i64 [[SHR_I41]], [[CONV]]
+; CHECK-NEXT:    [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]]
+; CHECK-NEXT:    [[SHR_I40:%.*]] = lshr i64 [[MUL7]], 32
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[SHR_I40]], [[MUL5]]
+; CHECK-NEXT:    [[SHR_I39:%.*]] = lshr i64 [[ADD]], 32
+; CHECK-NEXT:    [[ADD10:%.*]] = add i64 [[SHR_I39]], [[MUL]]
+; CHECK-NEXT:    [[CONV14:%.*]] = and i64 [[ADD]], 4294967295
+; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
+; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
+; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
+; CHECK-NEXT:    store i64 [[ADD17]], i64* [[RHI:%.*]], align 8
+; CHECK-NEXT:    [[MULLO:%.*]] = mul i64 [[B]], [[A]]
+; CHECK-NEXT:    ret i64 [[MULLO]]
+;
+  %conv = and i64 %a, 4294967295
+  %shr.i43 = lshr i64 %a, 32
+  %conv3 = and i64 %b, 4294967295
+  %shr.i41 = lshr i64 %b, 32
+  %mul = mul nuw i64 %shr.i41, %shr.i43
+  %mul5 = mul nuw i64 %conv3, %shr.i43
+  %mul6 = mul nuw i64 %shr.i41, %conv
+  %mul7 = mul nuw i64 %conv3, %conv
+  %shr.i40 = lshr i64 %mul7, 32
+  %add = add i64 %shr.i40, %mul5
+  %shr.i39 = lshr i64 %add, 32
+  %add10 = add i64 %shr.i39, %mul
+  %conv14 = and i64 %add, 4294967295
+  %add15 = add i64 %conv14, %mul6
+  %shr.i = lshr i64 %add15, 32
+  %add17 = add i64 %add10, %shr.i
+  store i64 %add17, i64* %rhi, align 8
+  %mullo = mul i64 %b, %a
+  ret i64 %mullo
+}
+
+define i64 @mul_full_64_variant2(i64 %a, i64 %b, i64* nocapture %rhi) {
+; CHECK-LABEL: @mul_full_64_variant2(
+; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
+; CHECK-NEXT:    [[SHR_I58:%.*]] = lshr i64 [[A]], 32
+; CHECK-NEXT:    [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295
+; CHECK-NEXT:    [[SHR_I56:%.*]] = lshr i64 [[B]], 32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i64 [[SHR_I56]], [[SHR_I58]]
+; CHECK-NEXT:    [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I58]]
+; CHECK-NEXT:    [[MUL6:%.*]] = mul nuw i64 [[SHR_I56]], [[CONV]]
+; CHECK-NEXT:    [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]]
+; CHECK-NEXT:    [[SHR_I55:%.*]] = lshr i64 [[MUL7]], 32
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[SHR_I55]], [[MUL5]]
+; CHECK-NEXT:    [[SHR_I54:%.*]] = lshr i64 [[ADD]], 32
+; CHECK-NEXT:    [[ADD10:%.*]] = add i64 [[SHR_I54]], [[MUL]]
+; CHECK-NEXT:    [[CONV14:%.*]] = and i64 [[ADD]], 4294967295
+; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
+; CHECK-NEXT:    [[SHR_I51:%.*]] = lshr i64 [[ADD15]], 32
+; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I51]]
+; CHECK-NEXT:    store i64 [[ADD17]], i64* [[RHI:%.*]], align 8
+; CHECK-NEXT:    [[CONV24:%.*]] = shl i64 [[ADD15]], 32
+; CHECK-NEXT:    [[CONV26:%.*]] = and i64 [[MUL7]], 4294967295
+; CHECK-NEXT:    [[ADD27:%.*]] = or i64 [[CONV24]], [[CONV26]]
+; CHECK-NEXT:    ret i64 [[ADD27]]
+;
+  %conv = and i64 %a, 4294967295
+  %shr.i58 = lshr i64 %a, 32
+  %conv3 = and i64 %b, 4294967295
+  %shr.i56 = lshr i64 %b, 32
+  %mul = mul nuw i64 %shr.i56, %shr.i58
+  %mul5 = mul nuw i64 %conv3, %shr.i58
+  %mul6 = mul nuw i64 %shr.i56, %conv
+  %mul7 = mul nuw i64 %conv3, %conv
+  %shr.i55 = lshr i64 %mul7, 32
+  %add = add i64 %shr.i55, %mul5
+  %shr.i54 = lshr i64 %add, 32
+  %add10 = add i64 %shr.i54, %mul
+  %conv14 = and i64 %add, 4294967295
+  %add15 = add i64 %conv14, %mul6
+  %shr.i51 = lshr i64 %add15, 32
+  %add17 = add i64 %add10, %shr.i51
+  store i64 %add17, i64* %rhi, align 8
+  %conv24 = shl i64 %add15, 32
+  %conv26 = and i64 %mul7, 4294967295
+  %add27 = or i64 %conv24, %conv26
+  ret i64 %add27
+}
+
+define i64 @mul_full_64_variant3(i64 %a, i64 %b, i64* nocapture %rhi) {
+; CHECK-LABEL: @mul_full_64_variant3(
+; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
+; CHECK-NEXT:    [[SHR_I45:%.*]] = lshr i64 [[A]], 32
+; CHECK-NEXT:    [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295
+; CHECK-NEXT:    [[SHR_I43:%.*]] = lshr i64 [[B]], 32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i64 [[SHR_I43]], [[SHR_I45]]
+; CHECK-NEXT:    [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I45]]
+; CHECK-NEXT:    [[MUL6:%.*]] = mul nuw i64 [[SHR_I43]], [[CONV]]
+; CHECK-NEXT:    [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]]
+; CHECK-NEXT:    [[SHR_I42:%.*]] = lshr i64 [[MUL7]], 32
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[SHR_I42]], [[MUL5]]
+; CHECK-NEXT:    [[SHR_I41:%.*]] = lshr i64 [[ADD]], 32
+; CHECK-NEXT:    [[ADD10:%.*]] = add i64 [[SHR_I41]], [[MUL]]
+; CHECK-NEXT:    [[CONV14:%.*]] = and i64 [[ADD]], 4294967295
+; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
+; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
+; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
+; CHECK-NEXT:    store i64 [[ADD17]], i64* [[RHI:%.*]], align 8
+; CHECK-NEXT:    [[ADD18:%.*]] = add i64 [[MUL6]], [[MUL5]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[ADD18]], 32
+; CHECK-NEXT:    [[ADD19:%.*]] = add i64 [[SHL]], [[MUL7]]
+; CHECK-NEXT:    ret i64 [[ADD19]]
+;
+  %conv = and i64 %a, 4294967295
+  %shr.i45 = lshr i64 %a, 32
+  %conv3 = and i64 %b, 4294967295
+  %shr.i43 = lshr i64 %b, 32
+  %mul = mul nuw i64 %shr.i43, %shr.i45
+  %mul5 = mul nuw i64 %conv3, %shr.i45
+  %mul6 = mul nuw i64 %shr.i43, %conv
+  %mul7 = mul nuw i64 %conv3, %conv
+  %shr.i42 = lshr i64 %mul7, 32
+  %add = add i64 %shr.i42, %mul5
+  %shr.i41 = lshr i64 %add, 32
+  %add10 = add i64 %shr.i41, %mul
+  %conv14 = and i64 %add, 4294967295
+  %add15 = add i64 %conv14, %mul6
+  %shr.i = lshr i64 %add15, 32
+  %add17 = add i64 %add10, %shr.i
+  store i64 %add17, i64* %rhi, align 8
+  %add18 = add i64 %mul6, %mul5
+  %shl = shl i64 %add18, 32
+  %add19 = add i64 %shl, %mul7
+  ret i64 %add19
+}
+
+
+define { i32, i32 } @mul_full_32(i32 %x, i32 %y) {
+; CHECK-LABEL: @mul_full_32(
+; CHECK-NEXT:    [[XL:%.*]] = and i32 [[X:%.*]], 65535
+; CHECK-NEXT:    [[XH:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT:    [[YL:%.*]] = and i32 [[Y:%.*]], 65535
+; CHECK-NEXT:    [[YH:%.*]] = lshr i32 [[Y]], 16
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i32 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i32 [[YL]], [[XH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i32 [[YH]], [[XL]]
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i32 [[YH]], [[XH]]
+; CHECK-NEXT:    [[T0L:%.*]] = and i32 [[T0]], 65535
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i32 [[T0]], 16
+; CHECK-NEXT:    [[U0:%.*]] = add i32 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i32 [[U0]], 65535
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i32 [[U0]], 16
+; CHECK-NEXT:    [[U1:%.*]] = add i32 [[U0L]], [[T2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i32 [[U1]], 16
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i32 [[U1]], 16
+; CHECK-NEXT:    [[U2:%.*]] = add i32 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[LO:%.*]] = or i32 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    [[HI:%.*]] = add i32 [[U2]], [[U1H]]
+; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i32, i32 } undef, i32 [[LO]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i32, i32 } [[RES_LO]], i32 [[HI]], 1
+; CHECK-NEXT:    ret { i32, i32 } [[RES]]
+;
+  %xl = and i32 %x, 65535
+  %xh = lshr i32 %x, 16
+  %yl = and i32 %y, 65535
+  %yh = lshr i32 %y, 16
+
+  %t0 = mul nuw i32 %yl, %xl
+  %t1 = mul nuw i32 %yl, %xh
+  %t2 = mul nuw i32 %yh, %xl
+  %t3 = mul nuw i32 %yh, %xh
+
+  %t0l = and i32 %t0, 65535
+  %t0h = lshr i32 %t0, 16
+
+  %u0 = add i32 %t0h, %t1
+  %u0l = and i32 %u0, 65535
+  %u0h = lshr i32 %u0, 16
+
+  %u1 = add i32 %u0l, %t2
+  %u1ls = shl i32 %u1, 16
+  %u1h = lshr i32 %u1, 16
+
+  %u2 = add i32 %u0h, %t3
+
+  %lo = or i32 %u1ls, %t0l
+  %hi = add i32 %u2, %u1h
+
+  %res_lo = insertvalue { i32, i32 } undef, i32 %lo, 0
+  %res = insertvalue { i32, i32 } %res_lo, i32 %hi, 1
+  ret { i32, i32 } %res
+}
+
+
+declare i64 @get_number()
+
+; In the following test cases %x and %y are instructions, not arguments.
+; This tests the placement of mul i128 and zexts.
+; Instructions are also shuffled.
+
+define { i64, i64 } @mul_full_64_variant0_1() {
+; CHECK-LABEL: @mul_full_64_variant0_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[TMP1]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[TMP1]], 32
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[TMP2]], 32
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[TMP2]], 4294967295
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
+; CHECK-NEXT:    [[LO:%.*]] = or i64 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
+; CHECK-NEXT:    ret { i64, i64 } [[RES]]
+;
+  %1 = call i64 @get_number()
+  %yl = and i64 %1, 4294967295
+  %yh = lshr i64 %1, 32
+
+  %2 = call i64 @get_number()
+  %xh = lshr i64 %2, 32
+  %xl = and i64 %2, 4294967295
+
+  %t1 = mul nuw i64 %yl, %xh
+  %t3 = mul nuw i64 %yh, %xh
+  %t2 = mul nuw i64 %yh, %xl
+  %t0 = mul nuw i64 %yl, %xl
+
+  %t0h = lshr i64 %t0, 32
+  %u0 = add i64 %t0h, %t1
+  %u0l = and i64 %u0, 4294967295
+  %u1 = add i64 %u0l, %t2
+  %u0h = lshr i64 %u0, 32
+  %u2 = add i64 %u0h, %t3
+  %u1h = lshr i64 %u1, 32
+  %hi = add i64 %u2, %u1h
+
+  %u1ls = shl i64 %u1, 32
+  %t0l = and i64 %t0, 4294967295
+  %lo = or i64 %u1ls, %t0l
+
+  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
+  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
+  ret { i64, i64 } %res
+}
+
+define { i64, i64 } @mul_full_64_variant0_2() {
+; CHECK-LABEL: @mul_full_64_variant0_2(
+; CHECK-NEXT:    [[X:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[Y:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]]
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]]
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T1]], [[T0H]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[T2]], [[U0L]]
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U1H]], [[U2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
+; CHECK-NEXT:    [[LO:%.*]] = or i64 [[T0L]], [[U1LS]]
+; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
+; CHECK-NEXT:    ret { i64, i64 } [[RES]]
+;
+  %x = call i64 @get_number()
+  %y = call i64 @get_number()
+
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+  %xh = lshr i64 %x, 32
+  %xl = and i64 %x, 4294967295
+
+  %t3 = mul nuw i64 %xh, %yh
+  %t2 = mul nuw i64 %xl, %yh
+  %t1 = mul nuw i64 %xh, %yl
+  %t0 = mul nuw i64 %xl, %yl
+
+  %t0h = lshr i64 %t0, 32
+  %u0 = add i64 %t1, %t0h
+  %u0l = and i64 %u0, 4294967295
+  %u1 = add i64 %t2, %u0l
+  %u0h = lshr i64 %u0, 32
+  %u2 = add i64 %u0h, %t3
+  %u1h = lshr i64 %u1, 32
+  %hi = add i64 %u1h, %u2
+
+  %u1ls = shl i64 %u1, 32
+  %t0l = and i64 %t0, 4294967295
+  %lo = or i64 %t0l, %u1ls
+
+  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
+  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
+  ret { i64, i64 } %res
+}
+
+
+define i64 @umulh_64(i64 %x, i64 %y) {
+; CHECK-LABEL: @umulh_64(
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X:%.*]], 4294967295
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
+; CHECK-NEXT:    ret i64 [[HI]]
+;
+  %xl = and i64 %x, 4294967295
+  %xh = lshr i64 %x, 32
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+
+  %t0 = mul nuw i64 %yl, %xl
+  %t1 = mul nuw i64 %yl, %xh
+  %t2 = mul nuw i64 %yh, %xl
+  %t3 = mul nuw i64 %yh, %xh
+
+  %t0h = lshr i64 %t0, 32
+
+  %u0 = add i64 %t0h, %t1
+  %u0l = and i64 %u0, 4294967295
+  %u0h = lshr i64 %u0, 32
+
+  %u1 = add i64 %u0l, %t2
+  %u1h = lshr i64 %u1, 32
+
+  %u2 = add i64 %u0h, %t3
+
+  %hi = add i64 %u2, %u1h
+  ret i64 %hi
+}
+
+
+define i64 @mullo(i64 %x, i64 %y) {
+; CHECK-LABEL: @mullo(
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X:%.*]], 4294967295
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul i64 [[XH]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = mul i64 [[YH]], [[X]]
+; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0]], [[T2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[LO:%.*]] = or i64 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    ret i64 [[LO]]
+;
+  %xl = and i64 %x, 4294967295
+  %xh = lshr i64 %x, 32
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+
+  %t0 = mul nuw i64 %yl, %xl
+  %t1 = mul nuw i64 %yl, %xh
+  %t2 = mul nuw i64 %yh, %xl
+
+  %t0l = and i64 %t0, 4294967295
+  %t0h = lshr i64 %t0, 32
+
+  %u0 = add i64 %t0h, %t1
+  %u0l = and i64 %u0, 4294967295
+
+  %u1 = add i64 %u0l, %t2
+  %u1ls = shl i64 %u1, 32
+
+  %lo = or i64 %u1ls, %t0l
+  ret i64 %lo
+}
+
+
+define i64 @mullo_variant3(i64 %a, i64 %b) {
+; CHECK-LABEL: @mullo_variant3(
+; CHECK-NEXT:    [[AL:%.*]] = and i64 [[A:%.*]], 4294967295
+; CHECK-NEXT:    [[AH:%.*]] = lshr i64 [[A]], 32
+; CHECK-NEXT:    [[BL:%.*]] = and i64 [[B:%.*]], 4294967295
+; CHECK-NEXT:    [[BH:%.*]] = lshr i64 [[B]], 32
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[BL]], [[AL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul i64 [[AH]], [[B]]
+; CHECK-NEXT:    [[T2:%.*]] = mul i64 [[BH]], [[A]]
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[T2]], [[T1]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[LO:%.*]] = add i64 [[U1LS]], [[T0]]
+; CHECK-NEXT:    ret i64 [[LO]]
+;
+  %al = and i64 %a, 4294967295
+  %ah = lshr i64 %a, 32
+  %bl = and i64 %b, 4294967295
+  %bh = lshr i64 %b, 32
+
+  %t0 = mul nuw i64 %bl, %al
+  %t1 = mul nuw i64 %bl, %ah
+  %t2 = mul nuw i64 %bh, %al
+
+  %u1 = add i64 %t2, %t1
+  %u1ls = shl i64 %u1, 32
+
+  %lo = add i64 %u1ls, %t0
+  ret i64 %lo
+}
+
+
+declare void @eat_i64(i64)
+declare void @eat_i128(i128)
+
+define i64 @mullo_duplicate(i64 %x, i64 %y) {
+; CHECK-LABEL: @mullo_duplicate(
+; CHECK-NEXT:    [[DUPLICATED_MUL:%.*]] = mul i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @eat_i64(i64 [[DUPLICATED_MUL]])
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul i64 [[XH]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = mul i64 [[YH]], [[X]]
+; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0]], [[T2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[LO:%.*]] = or i64 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    ret i64 [[LO]]
+;
+  %duplicated_mul = mul i64 %x, %y
+  call void @eat_i64(i64 %duplicated_mul)
+
+  %xl = and i64 %x, 4294967295
+  %xh = lshr i64 %x, 32
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+
+  %t0 = mul nuw i64 %yl, %xl
+  %t1 = mul nuw i64 %yl, %xh
+  %t2 = mul nuw i64 %yh, %xl
+
+  %t0l = and i64 %t0, 4294967295
+  %t0h = lshr i64 %t0, 32
+
+  %u0 = add i64 %t0h, %t1
+  %u0l = and i64 %u0, 4294967295
+
+  %u1 = add i64 %u0l, %t2
+  %u1ls = shl i64 %u1, 32
+
+  %lo = or i64 %u1ls, %t0l
+  ret i64 %lo
+}
+
+define { i64, i64 } @mul_full_64_duplicate(i64 %x, i64 %y) {
+; CHECK-LABEL: @mul_full_64_duplicate(
+; CHECK-NEXT:    [[XX:%.*]] = zext i64 [[X:%.*]] to i128
+; CHECK-NEXT:    [[YY:%.*]] = zext i64 [[Y:%.*]] to i128
+; CHECK-NEXT:    [[DUPLICATED_MUL:%.*]] = mul nuw i128 [[XX]], [[YY]]
+; CHECK-NEXT:    call void @eat_i128(i128 [[DUPLICATED_MUL]])
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]]
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]]
+; CHECK-NEXT:    [[T0L:%.*]] = and i64 [[T0]], 4294967295
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T0H]], [[T1]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[U0L]], [[T2]]
+; CHECK-NEXT:    [[U1LS:%.*]] = shl i64 [[U1]], 32
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[LO:%.*]] = or i64 [[U1LS]], [[T0L]]
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U2]], [[U1H]]
+; CHECK-NEXT:    [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1
+; CHECK-NEXT:    ret { i64, i64 } [[RES]]
+;
+  %xx = zext i64 %x to i128
+  %yy = zext i64 %y to i128
+  %duplicated_mul = mul i128 %xx, %yy
+  call void @eat_i128(i128 %duplicated_mul)
+
+  %xl = and i64 %x, 4294967295
+  %xh = lshr i64 %x, 32
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+
+  %t0 = mul nuw i64 %yl, %xl
+  %t1 = mul nuw i64 %yl, %xh
+  %t2 = mul nuw i64 %yh, %xl
+  %t3 = mul nuw i64 %yh, %xh
+
+  %t0l = and i64 %t0, 4294967295
+  %t0h = lshr i64 %t0, 32
+
+  %u0 = add i64 %t0h, %t1
+  %u0l = and i64 %u0, 4294967295
+  %u0h = lshr i64 %u0, 32
+
+  %u1 = add i64 %u0l, %t2
+  %u1ls = shl i64 %u1, 32
+  %u1h = lshr i64 %u1, 32
+
+  %u2 = add i64 %u0h, %t3
+
+  %lo = or i64 %u1ls, %t0l
+  %hi = add i64 %u2, %u1h
+
+  %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0
+  %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1
+  ret { i64, i64 } %res
+}
+
+
+define i64 @umulhi_64_v2() {
+; CHECK-LABEL: @umulhi_64_v2(
+; CHECK-NEXT:    [[X:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[Y:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]]
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]]
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T1]], [[T0H]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[T2]], [[U0L]]
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U1H]], [[U2]]
+; CHECK-NEXT:    ret i64 [[HI]]
+;
+  %x = call i64 @get_number()
+  %y = call i64 @get_number()
+
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+  %xh = lshr i64 %x, 32
+  %xl = and i64 %x, 4294967295
+
+  %t3 = mul nuw i64 %xh, %yh
+  %t2 = mul nuw i64 %xl, %yh
+  %t1 = mul nuw i64 %xh, %yl
+  %t0 = mul nuw i64 %xl, %yl
+
+  %t0h = lshr i64 %t0, 32
+  %u0 = add i64 %t1, %t0h
+  %u0l = and i64 %u0, 4294967295
+  %u1 = add i64 %t2, %u0l
+  %u0h = lshr i64 %u0, 32
+  %u2 = add i64 %u0h, %t3
+  %u1h = lshr i64 %u1, 32
+  %hi = add i64 %u1h, %u2
+
+  ret i64 %hi
+}
+
+
+define i64 @umulhi_64_v3() {
+; CHECK-LABEL: @umulhi_64_v3(
+; CHECK-NEXT:    [[X:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[XH:%.*]] = lshr i64 [[X]], 32
+; CHECK-NEXT:    [[XL:%.*]] = and i64 [[X]], 4294967295
+; CHECK-NEXT:    [[Y:%.*]] = call i64 @get_number()
+; CHECK-NEXT:    [[YL:%.*]] = and i64 [[Y]], 4294967295
+; CHECK-NEXT:    [[YH:%.*]] = lshr i64 [[Y]], 32
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]]
+; CHECK-NEXT:    [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]]
+; CHECK-NEXT:    [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]]
+; CHECK-NEXT:    [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]]
+; CHECK-NEXT:    [[T0H:%.*]] = lshr i64 [[T0]], 32
+; CHECK-NEXT:    [[U0:%.*]] = add i64 [[T1]], [[T0H]]
+; CHECK-NEXT:    [[U0L:%.*]] = and i64 [[U0]], 4294967295
+; CHECK-NEXT:    [[U1:%.*]] = add i64 [[T2]], [[U0L]]
+; CHECK-NEXT:    [[U0H:%.*]] = lshr i64 [[U0]], 32
+; CHECK-NEXT:    [[U2:%.*]] = add i64 [[U0H]], [[T3]]
+; CHECK-NEXT:    [[U1H:%.*]] = lshr i64 [[U1]], 32
+; CHECK-NEXT:    [[HI:%.*]] = add i64 [[U1H]], [[U2]]
+; CHECK-NEXT:    ret i64 [[HI]]
+;
+  %x = call i64 @get_number()
+  %xh = lshr i64 %x, 32
+  %xl = and i64 %x, 4294967295
+
+  %y = call i64 @get_number()
+  %yl = and i64 %y, 4294967295
+  %yh = lshr i64 %y, 32
+
+  %t3 = mul nuw i64 %xh, %yh
+  %t2 = mul nuw i64 %xl, %yh
+  %t1 = mul nuw i64 %xh, %yl
+  %t0 = mul nuw i64 %xl, %yl
+
+  %t0h = lshr i64 %t0, 32
+  %u0 = add i64 %t1, %t0h
+  %u0l = and i64 %u0, 4294967295
+  %u1 = add i64 %t2, %u0l
+  %u0h = lshr i64 %u0, 32
+  %u2 = add i64 %u0h, %t3
+  %u1h = lshr i64 %u1, 32
+  %hi = add i64 %u1h, %u2
+
+  ret i64 %hi
+}


        


More information about the llvm-commits mailing list