[PATCH] D14057: AVX-512: Fixed a bug, add not materializable instructions.
Igor Breger via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 26 01:20:15 PDT 2015
igorb created this revision.
igorb added a reviewer: delena.
igorb added a subscriber: llvm-commits.
igorb set the repository for this revision to rL LLVM.
Bug https://llvm.org/bugs/show_bug.cgi?id=25270
Add AVX-512 not materializable instructions.
Otherwise value can be reused , despite its value could be changed - produces incorrect assembler.
example
define void @bar__512(<16 x i32>* %var) {
%var_load_load = load <16 x i32>, <16 x i32>* %var, align 1
store <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>, <16 x i32>* %var, align 64
call void @Print__512(<16 x i32> %var_load_load)
call void @Print__512(<16 x i32> %var_load_load)
store <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32>* %var, align 64
ret void
}
Befor the fix
movq %rdi, %rbx
vmovdqu32 (%rbx), %zmm0
vpbroadcastd .LCPI0_0(%rip), %zmm1
vmovdqa32 %zmm1, (%rbx) -- change value
callq Print__512
vmovdqu32 (%rbx), %zmm0 -- incorrect , value has been changed ,
callq Print__512
After fix
movq %rdi, %rbx
vmovdqu32 (%rbx), %zmm0
vmovups %zmm0, (%rsp) # 64-byte Spill
vpbroadcastd .LCPI0_0(%rip), %zmm1
vmovdqa32 %zmm1, (%rbx)
callq Print__512
vmovups (%rsp), %zmm0 # 64-byte Reload
callq Print__512
Repository:
rL LLVM
http://reviews.llvm.org/D14057
Files:
lib/Target/X86/X86InstrInfo.cpp
test/CodeGen/X86/avx512-bugfix-25270.ll
Index: test/CodeGen/X86/avx512-bugfix-25270.ll
===================================================================
--- test/CodeGen/X86/avx512-bugfix-25270.ll
+++ test/CodeGen/X86/avx512-bugfix-25270.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+declare void @Print__512(<16 x i32>) #0
+
+define void @bar__512(<16 x i32>* %var) #0 {
+; CHECK-LABEL: bar__512:
+; CHECK: ## BB#0: ## %allocas
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $112, %rsp
+; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0
+; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
+; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx)
+; CHECK-NEXT: callq _Print__512
+; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
+; CHECK-NEXT: callq _Print__512
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0
+; CHECK-NEXT: vmovdqa32 %zmm0, (%rbx)
+; CHECK-NEXT: addq $112, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+allocas:
+ %var_load_load = load <16 x i32>, <16 x i32>* %var, align 1
+ store <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>, <16 x i32>* %var, align 64
+ call void @Print__512(<16 x i32> %var_load_load)
+ ; %var_load_load value should be reloaded
+ call void @Print__512(<16 x i32> %var_load_load)
+ store <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32>* %var, align 64
+ ret void
+}
+
+
+attributes #0 = { nounwind }
Index: lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- lib/Target/X86/X86InstrInfo.cpp
+++ lib/Target/X86/X86InstrInfo.cpp
@@ -2287,7 +2287,35 @@
case X86::FsVMOVAPSrm:
case X86::FsVMOVAPDrm:
case X86::FsMOVAPSrm:
- case X86::FsMOVAPDrm: {
+ case X86::FsMOVAPDrm:
+ // AVX-512
+ case X86::VMOVAPDZ128rm:
+ case X86::VMOVAPDZ256rm:
+ case X86::VMOVAPDZrm:
+ case X86::VMOVAPSZ128rm:
+ case X86::VMOVAPSZ256rm:
+ case X86::VMOVAPSZrm:
+ case X86::VMOVDQA32Z128rm:
+ case X86::VMOVDQA32Z256rm:
+ case X86::VMOVDQA32Zrm:
+ case X86::VMOVDQA64Z128rm:
+ case X86::VMOVDQA64Z256rm:
+ case X86::VMOVDQA64Zrm:
+ case X86::VMOVDQU16Z128rm:
+ case X86::VMOVDQU16Z256rm:
+ case X86::VMOVDQU16Zrm:
+ case X86::VMOVDQU32Z128rm:
+ case X86::VMOVDQU32Z256rm:
+ case X86::VMOVDQU32Zrm:
+ case X86::VMOVDQU64Z128rm:
+ case X86::VMOVDQU64Z256rm:
+ case X86::VMOVDQU64Zrm:
+ case X86::VMOVDQU8Z128rm:
+ case X86::VMOVDQU8Z256rm:
+ case X86::VMOVDQU8Zrm:
+ case X86::VMOVUPSZ128rm:
+ case X86::VMOVUPSZ256rm:
+ case X86::VMOVUPSZrm: {
// Loads from constant pools are trivially rematerializable.
if (MI->getOperand(1+X86::AddrBaseReg).isReg() &&
MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D14057.38377.patch
Type: text/x-patch
Size: 2920 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151026/d2449b7d/attachment.bin>
More information about the llvm-commits
mailing list