[llvm] r350271 - [X86] Add test cases to show that we fail to fold loads into i8 smulo and i8/i16/i32/i64 umulo lowering without the assistance of the peephole pass. NFC

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 2 15:24:03 PST 2019


Author: ctopper
Date: Wed Jan  2 15:24:03 2019
New Revision: 350271

URL: http://llvm.org/viewvc/llvm-project?rev=350271&view=rev
Log:
[X86] Add test cases to show that we fail to fold loads into i8 smulo and i8/i16/i32/i64 umulo lowering without the assistance of the peephole pass. NFC

Modified:
    llvm/trunk/test/CodeGen/X86/xmulo.ll

Modified: llvm/trunk/test/CodeGen/X86/xmulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xmulo.ll?rev=350271&r1=350270&r2=350271&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xmulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xmulo.ll Wed Jan  2 15:24:03 2019
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG
-; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=FAST
-; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=SDAG --check-prefix=KNL
+; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG
+; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=FAST
+; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=SDAG --check-prefix=KNL
 
 define {i64, i1} @t1() nounwind {
 ; SDAG-LABEL: t1:
@@ -722,6 +722,443 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
   ret i1 %x1
 }
 
+define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
+; SDAG-LABEL: smuloi8_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movb (%rdi), %al
+; SDAG-NEXT:    imulb %sil
+; SDAG-NEXT:    seto %cl
+; SDAG-NEXT:    movb %al, (%rdx)
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi8_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movb (%rdi), %al
+; FAST-NEXT:    imulb %sil
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    movb %al, (%rdx)
+; FAST-NEXT:    andb $1, %cl
+; FAST-NEXT:    movzbl %cl, %eax
+; FAST-NEXT:    retq
+  %v1 = load i8, i8* %ptr1
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
+; SDAG-LABEL: smuloi8_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movl %edi, %eax
+; SDAG-NEXT:    movb (%rsi), %cl
+; SDAG-NEXT:    ## kill: def $al killed $al killed $eax
+; SDAG-NEXT:    imulb %cl
+; SDAG-NEXT:    seto %cl
+; SDAG-NEXT:    movb %al, (%rdx)
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi8_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movl %edi, %eax
+; FAST-NEXT:    ## kill: def $al killed $al killed $eax
+; FAST-NEXT:    imulb (%rsi)
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    movb %al, (%rdx)
+; FAST-NEXT:    andb $1, %cl
+; FAST-NEXT:    movzbl %cl, %eax
+; FAST-NEXT:    retq
+  %v2 = load i8, i8* %ptr2
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
+; SDAG-LABEL: smuloi16_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    imulw (%rdi), %si
+; SDAG-NEXT:    seto %al
+; SDAG-NEXT:    movw %si, (%rdx)
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi16_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    imulw (%rdi), %si
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    movw %si, (%rdx)
+; FAST-NEXT:    andb $1, %al
+; FAST-NEXT:    movzbl %al, %eax
+; FAST-NEXT:    retq
+  %v1 = load i16, i16* %ptr1
+  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
+; SDAG-LABEL: smuloi16_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    imulw (%rsi), %di
+; SDAG-NEXT:    seto %al
+; SDAG-NEXT:    movw %di, (%rdx)
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi16_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    imulw (%rsi), %di
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    movw %di, (%rdx)
+; FAST-NEXT:    andb $1, %al
+; FAST-NEXT:    movzbl %al, %eax
+; FAST-NEXT:    retq
+  %v2 = load i16, i16* %ptr2
+  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
+; SDAG-LABEL: smuloi32_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    imull (%rdi), %esi
+; SDAG-NEXT:    seto %al
+; SDAG-NEXT:    movl %esi, (%rdx)
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi32_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    imull (%rdi), %esi
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    movl %esi, (%rdx)
+; FAST-NEXT:    andb $1, %al
+; FAST-NEXT:    movzbl %al, %eax
+; FAST-NEXT:    retq
+  %v1 = load i32, i32* %ptr1
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
+; SDAG-LABEL: smuloi32_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    imull (%rsi), %edi
+; SDAG-NEXT:    seto %al
+; SDAG-NEXT:    movl %edi, (%rdx)
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi32_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    imull (%rsi), %edi
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    movl %edi, (%rdx)
+; FAST-NEXT:    andb $1, %al
+; FAST-NEXT:    movzbl %al, %eax
+; FAST-NEXT:    retq
+  %v2 = load i32, i32* %ptr2
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
+; SDAG-LABEL: smuloi64_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    imulq (%rdi), %rsi
+; SDAG-NEXT:    seto %al
+; SDAG-NEXT:    movq %rsi, (%rdx)
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi64_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    imulq (%rdi), %rsi
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    movq %rsi, (%rdx)
+; FAST-NEXT:    andb $1, %al
+; FAST-NEXT:    movzbl %al, %eax
+; FAST-NEXT:    retq
+  %v1 = load i64, i64* %ptr1
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
+; SDAG-LABEL: smuloi64_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    imulq (%rsi), %rdi
+; SDAG-NEXT:    seto %al
+; SDAG-NEXT:    movq %rdi, (%rdx)
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloi64_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    imulq (%rsi), %rdi
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    movq %rdi, (%rdx)
+; FAST-NEXT:    andb $1, %al
+; FAST-NEXT:    movzbl %al, %eax
+; FAST-NEXT:    retq
+  %v2 = load i64, i64* %ptr2
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
+; SDAG-LABEL: umuloi8_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movb (%rdi), %al
+; SDAG-NEXT:    mulb %sil
+; SDAG-NEXT:    seto %cl
+; SDAG-NEXT:    movb %al, (%rdx)
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi8_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movb (%rdi), %al
+; FAST-NEXT:    mulb %sil
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    movb %al, (%rdx)
+; FAST-NEXT:    andb $1, %cl
+; FAST-NEXT:    movzbl %cl, %eax
+; FAST-NEXT:    retq
+  %v1 = load i8, i8* %ptr1
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
+; SDAG-LABEL: umuloi8_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movl %edi, %eax
+; SDAG-NEXT:    movb (%rsi), %cl
+; SDAG-NEXT:    ## kill: def $al killed $al killed $eax
+; SDAG-NEXT:    mulb %cl
+; SDAG-NEXT:    seto %cl
+; SDAG-NEXT:    movb %al, (%rdx)
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi8_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movl %edi, %eax
+; FAST-NEXT:    ## kill: def $al killed $al killed $eax
+; FAST-NEXT:    mulb (%rsi)
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    movb %al, (%rdx)
+; FAST-NEXT:    andb $1, %cl
+; FAST-NEXT:    movzbl %cl, %eax
+; FAST-NEXT:    retq
+  %v2 = load i8, i8* %ptr2
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
+; SDAG-LABEL: umuloi16_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movq %rdx, %rcx
+; SDAG-NEXT:    movzwl (%rdi), %eax
+; SDAG-NEXT:    mulw %si
+; SDAG-NEXT:    seto %dl
+; SDAG-NEXT:    movw %ax, (%rcx)
+; SDAG-NEXT:    movl %edx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi16_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movq %rdx, %rcx
+; FAST-NEXT:    movzwl (%rdi), %eax
+; FAST-NEXT:    mulw %si
+; FAST-NEXT:    seto %dl
+; FAST-NEXT:    movw %ax, (%rcx)
+; FAST-NEXT:    andb $1, %dl
+; FAST-NEXT:    movzbl %dl, %eax
+; FAST-NEXT:    retq
+  %v1 = load i16, i16* %ptr1
+  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
+; SDAG-LABEL: umuloi16_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movq %rdx, %rcx
+; SDAG-NEXT:    movl %edi, %eax
+; SDAG-NEXT:    movzwl (%rsi), %edx
+; SDAG-NEXT:    ## kill: def $ax killed $ax killed $eax
+; SDAG-NEXT:    mulw %dx
+; SDAG-NEXT:    seto %dl
+; SDAG-NEXT:    movw %ax, (%rcx)
+; SDAG-NEXT:    movl %edx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi16_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movq %rdx, %rcx
+; FAST-NEXT:    movl %edi, %eax
+; FAST-NEXT:    ## kill: def $ax killed $ax killed $eax
+; FAST-NEXT:    mulw (%rsi)
+; FAST-NEXT:    seto %dl
+; FAST-NEXT:    movw %ax, (%rcx)
+; FAST-NEXT:    andb $1, %dl
+; FAST-NEXT:    movzbl %dl, %eax
+; FAST-NEXT:    retq
+  %v2 = load i16, i16* %ptr2
+  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
+; SDAG-LABEL: umuloi32_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movq %rdx, %rcx
+; SDAG-NEXT:    movl (%rdi), %eax
+; SDAG-NEXT:    mull %esi
+; SDAG-NEXT:    seto %dl
+; SDAG-NEXT:    movl %eax, (%rcx)
+; SDAG-NEXT:    movl %edx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi32_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movq %rdx, %rcx
+; FAST-NEXT:    movl (%rdi), %eax
+; FAST-NEXT:    mull %esi
+; FAST-NEXT:    seto %dl
+; FAST-NEXT:    movl %eax, (%rcx)
+; FAST-NEXT:    andb $1, %dl
+; FAST-NEXT:    movzbl %dl, %eax
+; FAST-NEXT:    retq
+  %v1 = load i32, i32* %ptr1
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
+; SDAG-LABEL: umuloi32_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movq %rdx, %rcx
+; SDAG-NEXT:    movl %edi, %eax
+; SDAG-NEXT:    movl (%rsi), %edx
+; SDAG-NEXT:    mull %edx
+; SDAG-NEXT:    seto %dl
+; SDAG-NEXT:    movl %eax, (%rcx)
+; SDAG-NEXT:    movl %edx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi32_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movq %rdx, %rcx
+; FAST-NEXT:    movl %edi, %eax
+; FAST-NEXT:    mull (%rsi)
+; FAST-NEXT:    seto %dl
+; FAST-NEXT:    movl %eax, (%rcx)
+; FAST-NEXT:    andb $1, %dl
+; FAST-NEXT:    movzbl %dl, %eax
+; FAST-NEXT:    retq
+  %v2 = load i32, i32* %ptr2
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
+; SDAG-LABEL: umuloi64_load:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movq %rdx, %rcx
+; SDAG-NEXT:    movq (%rdi), %rax
+; SDAG-NEXT:    mulq %rsi
+; SDAG-NEXT:    seto %dl
+; SDAG-NEXT:    movq %rax, (%rcx)
+; SDAG-NEXT:    movl %edx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi64_load:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movq %rdx, %rcx
+; FAST-NEXT:    movq (%rdi), %rax
+; FAST-NEXT:    mulq %rsi
+; FAST-NEXT:    seto %dl
+; FAST-NEXT:    movq %rax, (%rcx)
+; FAST-NEXT:    andb $1, %dl
+; FAST-NEXT:    movzbl %dl, %eax
+; FAST-NEXT:    retq
+  %v1 = load i64, i64* %ptr1
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
+; SDAG-LABEL: umuloi64_load2:
+; SDAG:       ## %bb.0:
+; SDAG-NEXT:    movq %rdx, %rcx
+; SDAG-NEXT:    movq %rdi, %rax
+; SDAG-NEXT:    movq (%rsi), %rdx
+; SDAG-NEXT:    mulq %rdx
+; SDAG-NEXT:    seto %dl
+; SDAG-NEXT:    movq %rax, (%rcx)
+; SDAG-NEXT:    movl %edx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloi64_load2:
+; FAST:       ## %bb.0:
+; FAST-NEXT:    movq %rdx, %rcx
+; FAST-NEXT:    movq %rdi, %rax
+; FAST-NEXT:    mulq (%rsi)
+; FAST-NEXT:    seto %dl
+; FAST-NEXT:    movq %rax, (%rcx)
+; FAST-NEXT:    andb $1, %dl
+; FAST-NEXT:    movzbl %dl, %eax
+; FAST-NEXT:    retq
+  %v2 = load i64, i64* %ptr2
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
 declare {i8,  i1} @llvm.smul.with.overflow.i8 (i8,  i8 ) nounwind readnone
 declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
 declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone




More information about the llvm-commits mailing list