[llvm] r350271 - [X86] Add test cases to show that we fail to fold loads into i8 smulo and i8/i16/i32/i64 umulo lowering without the assistance of the peephole pass. NFC
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 2 15:24:03 PST 2019
Author: ctopper
Date: Wed Jan 2 15:24:03 2019
New Revision: 350271
URL: http://llvm.org/viewvc/llvm-project?rev=350271&view=rev
Log:
[X86] Add test cases to show that we fail to fold loads into i8 smulo and i8/i16/i32/i64 umulo lowering without the assistance of the peephole pass. NFC
Modified:
llvm/trunk/test/CodeGen/X86/xmulo.ll
Modified: llvm/trunk/test/CodeGen/X86/xmulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xmulo.ll?rev=350271&r1=350270&r2=350271&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xmulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xmulo.ll Wed Jan 2 15:24:03 2019
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG
-; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=FAST
-; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=SDAG --check-prefix=KNL
+; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG
+; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=FAST
+; RUN: llc -disable-peephole -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=SDAG --check-prefix=KNL
define {i64, i1} @t1() nounwind {
; SDAG-LABEL: t1:
@@ -722,6 +722,443 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
ret i1 %x1
}
+define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
+; SDAG-LABEL: smuloi8_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movb (%rdi), %al
+; SDAG-NEXT: imulb %sil
+; SDAG-NEXT: seto %cl
+; SDAG-NEXT: movb %al, (%rdx)
+; SDAG-NEXT: movl %ecx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi8_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: movb (%rdi), %al
+; FAST-NEXT: imulb %sil
+; FAST-NEXT: seto %cl
+; FAST-NEXT: movb %al, (%rdx)
+; FAST-NEXT: andb $1, %cl
+; FAST-NEXT: movzbl %cl, %eax
+; FAST-NEXT: retq
+ %v1 = load i8, i8* %ptr1
+ %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
+ %val = extractvalue {i8, i1} %t, 0
+ %obit = extractvalue {i8, i1} %t, 1
+ store i8 %val, i8* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
+; SDAG-LABEL: smuloi8_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movl %edi, %eax
+; SDAG-NEXT: movb (%rsi), %cl
+; SDAG-NEXT: ## kill: def $al killed $al killed $eax
+; SDAG-NEXT: imulb %cl
+; SDAG-NEXT: seto %cl
+; SDAG-NEXT: movb %al, (%rdx)
+; SDAG-NEXT: movl %ecx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi8_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: movl %edi, %eax
+; FAST-NEXT: ## kill: def $al killed $al killed $eax
+; FAST-NEXT: imulb (%rsi)
+; FAST-NEXT: seto %cl
+; FAST-NEXT: movb %al, (%rdx)
+; FAST-NEXT: andb $1, %cl
+; FAST-NEXT: movzbl %cl, %eax
+; FAST-NEXT: retq
+ %v2 = load i8, i8* %ptr2
+ %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
+ %val = extractvalue {i8, i1} %t, 0
+ %obit = extractvalue {i8, i1} %t, 1
+ store i8 %val, i8* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
+; SDAG-LABEL: smuloi16_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: imulw (%rdi), %si
+; SDAG-NEXT: seto %al
+; SDAG-NEXT: movw %si, (%rdx)
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi16_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: imulw (%rdi), %si
+; FAST-NEXT: seto %al
+; FAST-NEXT: movw %si, (%rdx)
+; FAST-NEXT: andb $1, %al
+; FAST-NEXT: movzbl %al, %eax
+; FAST-NEXT: retq
+ %v1 = load i16, i16* %ptr1
+ %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
+ %val = extractvalue {i16, i1} %t, 0
+ %obit = extractvalue {i16, i1} %t, 1
+ store i16 %val, i16* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
+; SDAG-LABEL: smuloi16_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: imulw (%rsi), %di
+; SDAG-NEXT: seto %al
+; SDAG-NEXT: movw %di, (%rdx)
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi16_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: imulw (%rsi), %di
+; FAST-NEXT: seto %al
+; FAST-NEXT: movw %di, (%rdx)
+; FAST-NEXT: andb $1, %al
+; FAST-NEXT: movzbl %al, %eax
+; FAST-NEXT: retq
+ %v2 = load i16, i16* %ptr2
+ %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
+ %val = extractvalue {i16, i1} %t, 0
+ %obit = extractvalue {i16, i1} %t, 1
+ store i16 %val, i16* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
+; SDAG-LABEL: smuloi32_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: imull (%rdi), %esi
+; SDAG-NEXT: seto %al
+; SDAG-NEXT: movl %esi, (%rdx)
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi32_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: imull (%rdi), %esi
+; FAST-NEXT: seto %al
+; FAST-NEXT: movl %esi, (%rdx)
+; FAST-NEXT: andb $1, %al
+; FAST-NEXT: movzbl %al, %eax
+; FAST-NEXT: retq
+ %v1 = load i32, i32* %ptr1
+ %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
+; SDAG-LABEL: smuloi32_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: imull (%rsi), %edi
+; SDAG-NEXT: seto %al
+; SDAG-NEXT: movl %edi, (%rdx)
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi32_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: imull (%rsi), %edi
+; FAST-NEXT: seto %al
+; FAST-NEXT: movl %edi, (%rdx)
+; FAST-NEXT: andb $1, %al
+; FAST-NEXT: movzbl %al, %eax
+; FAST-NEXT: retq
+ %v2 = load i32, i32* %ptr2
+ %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
+; SDAG-LABEL: smuloi64_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: imulq (%rdi), %rsi
+; SDAG-NEXT: seto %al
+; SDAG-NEXT: movq %rsi, (%rdx)
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi64_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: imulq (%rdi), %rsi
+; FAST-NEXT: seto %al
+; FAST-NEXT: movq %rsi, (%rdx)
+; FAST-NEXT: andb $1, %al
+; FAST-NEXT: movzbl %al, %eax
+; FAST-NEXT: retq
+ %v1 = load i64, i64* %ptr1
+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
+; SDAG-LABEL: smuloi64_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: imulq (%rsi), %rdi
+; SDAG-NEXT: seto %al
+; SDAG-NEXT: movq %rdi, (%rdx)
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloi64_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: imulq (%rsi), %rdi
+; FAST-NEXT: seto %al
+; FAST-NEXT: movq %rdi, (%rdx)
+; FAST-NEXT: andb $1, %al
+; FAST-NEXT: movzbl %al, %eax
+; FAST-NEXT: retq
+ %v2 = load i64, i64* %ptr2
+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
+; SDAG-LABEL: umuloi8_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movb (%rdi), %al
+; SDAG-NEXT: mulb %sil
+; SDAG-NEXT: seto %cl
+; SDAG-NEXT: movb %al, (%rdx)
+; SDAG-NEXT: movl %ecx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi8_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: movb (%rdi), %al
+; FAST-NEXT: mulb %sil
+; FAST-NEXT: seto %cl
+; FAST-NEXT: movb %al, (%rdx)
+; FAST-NEXT: andb $1, %cl
+; FAST-NEXT: movzbl %cl, %eax
+; FAST-NEXT: retq
+ %v1 = load i8, i8* %ptr1
+ %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
+ %val = extractvalue {i8, i1} %t, 0
+ %obit = extractvalue {i8, i1} %t, 1
+ store i8 %val, i8* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
+; SDAG-LABEL: umuloi8_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movl %edi, %eax
+; SDAG-NEXT: movb (%rsi), %cl
+; SDAG-NEXT: ## kill: def $al killed $al killed $eax
+; SDAG-NEXT: mulb %cl
+; SDAG-NEXT: seto %cl
+; SDAG-NEXT: movb %al, (%rdx)
+; SDAG-NEXT: movl %ecx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi8_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: movl %edi, %eax
+; FAST-NEXT: ## kill: def $al killed $al killed $eax
+; FAST-NEXT: mulb (%rsi)
+; FAST-NEXT: seto %cl
+; FAST-NEXT: movb %al, (%rdx)
+; FAST-NEXT: andb $1, %cl
+; FAST-NEXT: movzbl %cl, %eax
+; FAST-NEXT: retq
+ %v2 = load i8, i8* %ptr2
+ %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
+ %val = extractvalue {i8, i1} %t, 0
+ %obit = extractvalue {i8, i1} %t, 1
+ store i8 %val, i8* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
+; SDAG-LABEL: umuloi16_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movq %rdx, %rcx
+; SDAG-NEXT: movzwl (%rdi), %eax
+; SDAG-NEXT: mulw %si
+; SDAG-NEXT: seto %dl
+; SDAG-NEXT: movw %ax, (%rcx)
+; SDAG-NEXT: movl %edx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi16_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: movq %rdx, %rcx
+; FAST-NEXT: movzwl (%rdi), %eax
+; FAST-NEXT: mulw %si
+; FAST-NEXT: seto %dl
+; FAST-NEXT: movw %ax, (%rcx)
+; FAST-NEXT: andb $1, %dl
+; FAST-NEXT: movzbl %dl, %eax
+; FAST-NEXT: retq
+ %v1 = load i16, i16* %ptr1
+ %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
+ %val = extractvalue {i16, i1} %t, 0
+ %obit = extractvalue {i16, i1} %t, 1
+ store i16 %val, i16* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
+; SDAG-LABEL: umuloi16_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movq %rdx, %rcx
+; SDAG-NEXT: movl %edi, %eax
+; SDAG-NEXT: movzwl (%rsi), %edx
+; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
+; SDAG-NEXT: mulw %dx
+; SDAG-NEXT: seto %dl
+; SDAG-NEXT: movw %ax, (%rcx)
+; SDAG-NEXT: movl %edx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi16_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: movq %rdx, %rcx
+; FAST-NEXT: movl %edi, %eax
+; FAST-NEXT: ## kill: def $ax killed $ax killed $eax
+; FAST-NEXT: mulw (%rsi)
+; FAST-NEXT: seto %dl
+; FAST-NEXT: movw %ax, (%rcx)
+; FAST-NEXT: andb $1, %dl
+; FAST-NEXT: movzbl %dl, %eax
+; FAST-NEXT: retq
+ %v2 = load i16, i16* %ptr2
+ %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
+ %val = extractvalue {i16, i1} %t, 0
+ %obit = extractvalue {i16, i1} %t, 1
+ store i16 %val, i16* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
+; SDAG-LABEL: umuloi32_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movq %rdx, %rcx
+; SDAG-NEXT: movl (%rdi), %eax
+; SDAG-NEXT: mull %esi
+; SDAG-NEXT: seto %dl
+; SDAG-NEXT: movl %eax, (%rcx)
+; SDAG-NEXT: movl %edx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi32_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: movq %rdx, %rcx
+; FAST-NEXT: movl (%rdi), %eax
+; FAST-NEXT: mull %esi
+; FAST-NEXT: seto %dl
+; FAST-NEXT: movl %eax, (%rcx)
+; FAST-NEXT: andb $1, %dl
+; FAST-NEXT: movzbl %dl, %eax
+; FAST-NEXT: retq
+ %v1 = load i32, i32* %ptr1
+ %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
+; SDAG-LABEL: umuloi32_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movq %rdx, %rcx
+; SDAG-NEXT: movl %edi, %eax
+; SDAG-NEXT: movl (%rsi), %edx
+; SDAG-NEXT: mull %edx
+; SDAG-NEXT: seto %dl
+; SDAG-NEXT: movl %eax, (%rcx)
+; SDAG-NEXT: movl %edx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi32_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: movq %rdx, %rcx
+; FAST-NEXT: movl %edi, %eax
+; FAST-NEXT: mull (%rsi)
+; FAST-NEXT: seto %dl
+; FAST-NEXT: movl %eax, (%rcx)
+; FAST-NEXT: andb $1, %dl
+; FAST-NEXT: movzbl %dl, %eax
+; FAST-NEXT: retq
+ %v2 = load i32, i32* %ptr2
+ %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+ %val = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ store i32 %val, i32* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
+; SDAG-LABEL: umuloi64_load:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movq %rdx, %rcx
+; SDAG-NEXT: movq (%rdi), %rax
+; SDAG-NEXT: mulq %rsi
+; SDAG-NEXT: seto %dl
+; SDAG-NEXT: movq %rax, (%rcx)
+; SDAG-NEXT: movl %edx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi64_load:
+; FAST: ## %bb.0:
+; FAST-NEXT: movq %rdx, %rcx
+; FAST-NEXT: movq (%rdi), %rax
+; FAST-NEXT: mulq %rsi
+; FAST-NEXT: seto %dl
+; FAST-NEXT: movq %rax, (%rcx)
+; FAST-NEXT: andb $1, %dl
+; FAST-NEXT: movzbl %dl, %eax
+; FAST-NEXT: retq
+ %v1 = load i64, i64* %ptr1
+ %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
+define zeroext i1 @umuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
+; SDAG-LABEL: umuloi64_load2:
+; SDAG: ## %bb.0:
+; SDAG-NEXT: movq %rdx, %rcx
+; SDAG-NEXT: movq %rdi, %rax
+; SDAG-NEXT: movq (%rsi), %rdx
+; SDAG-NEXT: mulq %rdx
+; SDAG-NEXT: seto %dl
+; SDAG-NEXT: movq %rax, (%rcx)
+; SDAG-NEXT: movl %edx, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloi64_load2:
+; FAST: ## %bb.0:
+; FAST-NEXT: movq %rdx, %rcx
+; FAST-NEXT: movq %rdi, %rax
+; FAST-NEXT: mulq (%rsi)
+; FAST-NEXT: seto %dl
+; FAST-NEXT: movq %rax, (%rcx)
+; FAST-NEXT: andb $1, %dl
+; FAST-NEXT: movzbl %dl, %eax
+; FAST-NEXT: retq
+ %v2 = load i64, i64* %ptr2
+ %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+ %val = extractvalue {i64, i1} %t, 0
+ %obit = extractvalue {i64, i1} %t, 1
+ store i64 %val, i64* %res
+ ret i1 %obit
+}
+
declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone
declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
More information about the llvm-commits
mailing list