[llvm] r280464 - [AVX-512] Add execution domain fixing for logical operations with broadcast loads. This builds on the handling of masked ops since we need to keep element size the same.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 1 22:29:10 PDT 2016


Author: ctopper
Date: Fri Sep  2 00:29:09 2016
New Revision: 280464

URL: http://llvm.org/viewvc/llvm-project?rev=280464&view=rev
Log:
[AVX-512] Add execution domain fixing for logical operations with broadcast loads. This builds on the handling of masked ops since we need to keep element size the same.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/avx512-arith.ll
    llvm/trunk/test/CodeGen/X86/avx512-logic.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=280464&r1=280463&r2=280464&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Sep  2 00:29:09 2016
@@ -7315,6 +7315,86 @@ static const uint16_t ReplaceableInstrsA
     X86::VPXORQZrrk,      X86::VPXORDZrrk      },
   { X86::VXORPSZrrkz,     X86::VXORPDZrrkz,
     X86::VPXORQZrrkz,     X86::VPXORDZrrkz     },
+  // Broadcast loads can be handled the same as masked operations to avoid
+  // changing element size.
+  { X86::VANDNPSZ128rmb,  X86::VANDNPDZ128rmb,
+    X86::VPANDNQZ128rmb,  X86::VPANDNDZ128rmb  },
+  { X86::VANDPSZ128rmb,   X86::VANDPDZ128rmb,
+    X86::VPANDQZ128rmb,   X86::VPANDDZ128rmb   },
+  { X86::VORPSZ128rmb,    X86::VORPDZ128rmb,
+    X86::VPORQZ128rmb,    X86::VPORDZ128rmb    },
+  { X86::VXORPSZ128rmb,   X86::VXORPDZ128rmb,
+    X86::VPXORQZ128rmb,   X86::VPXORDZ128rmb   },
+  { X86::VANDNPSZ256rmb,  X86::VANDNPDZ256rmb,
+    X86::VPANDNQZ256rmb,  X86::VPANDNDZ256rmb  },
+  { X86::VANDPSZ256rmb,   X86::VANDPDZ256rmb,
+    X86::VPANDQZ256rmb,   X86::VPANDDZ256rmb   },
+  { X86::VORPSZ256rmb,    X86::VORPDZ256rmb,
+    X86::VPORQZ256rmb,    X86::VPORDZ256rmb    },
+  { X86::VXORPSZ256rmb,   X86::VXORPDZ256rmb,
+    X86::VPXORQZ256rmb,   X86::VPXORDZ256rmb   },
+  { X86::VANDNPSZrmb,     X86::VANDNPDZrmb,
+    X86::VPANDNQZrmb,     X86::VPANDNDZrmb     },
+  { X86::VANDPSZrmb,      X86::VANDPDZrmb,
+    X86::VPANDQZrmb,      X86::VPANDDZrmb      },
+  { X86::VANDPSZrmb,      X86::VANDPDZrmb,
+    X86::VPANDQZrmb,      X86::VPANDDZrmb      },
+  { X86::VORPSZrmb,       X86::VORPDZrmb,
+    X86::VPORQZrmb,       X86::VPORDZrmb       },
+  { X86::VXORPSZrmb,      X86::VXORPDZrmb,
+    X86::VPXORQZrmb,      X86::VPXORDZrmb      },
+  { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk,
+    X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk },
+  { X86::VANDPSZ128rmbk,  X86::VANDPDZ128rmbk,
+    X86::VPANDQZ128rmbk,  X86::VPANDDZ128rmbk  },
+  { X86::VORPSZ128rmbk,   X86::VORPDZ128rmbk,
+    X86::VPORQZ128rmbk,   X86::VPORDZ128rmbk   },
+  { X86::VXORPSZ128rmbk,  X86::VXORPDZ128rmbk,
+    X86::VPXORQZ128rmbk,  X86::VPXORDZ128rmbk  },
+  { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk,
+    X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk },
+  { X86::VANDPSZ256rmbk,  X86::VANDPDZ256rmbk,
+    X86::VPANDQZ256rmbk,  X86::VPANDDZ256rmbk  },
+  { X86::VORPSZ256rmbk,   X86::VORPDZ256rmbk,
+    X86::VPORQZ256rmbk,   X86::VPORDZ256rmbk   },
+  { X86::VXORPSZ256rmbk,  X86::VXORPDZ256rmbk,
+    X86::VPXORQZ256rmbk,  X86::VPXORDZ256rmbk  },
+  { X86::VANDNPSZrmbk,    X86::VANDNPDZrmbk,
+    X86::VPANDNQZrmbk,    X86::VPANDNDZrmbk    },
+  { X86::VANDPSZrmbk,     X86::VANDPDZrmbk,
+    X86::VPANDQZrmbk,     X86::VPANDDZrmbk     },
+  { X86::VANDPSZrmbk,     X86::VANDPDZrmbk,
+    X86::VPANDQZrmbk,     X86::VPANDDZrmbk     },
+  { X86::VORPSZrmbk,      X86::VORPDZrmbk,
+    X86::VPORQZrmbk,      X86::VPORDZrmbk      },
+  { X86::VXORPSZrmbk,     X86::VXORPDZrmbk,
+    X86::VPXORQZrmbk,     X86::VPXORDZrmbk     },
+  { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz,
+    X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz},
+  { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz,
+    X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz },
+  { X86::VORPSZ128rmbkz,  X86::VORPDZ128rmbkz,
+    X86::VPORQZ128rmbkz,  X86::VPORDZ128rmbkz  },
+  { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz,
+    X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz },
+  { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz,
+    X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz},
+  { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz,
+    X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz },
+  { X86::VORPSZ256rmbkz,  X86::VORPDZ256rmbkz,
+    X86::VPORQZ256rmbkz,  X86::VPORDZ256rmbkz  },
+  { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz,
+    X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz },
+  { X86::VANDNPSZrmbkz,   X86::VANDNPDZrmbkz,
+    X86::VPANDNQZrmbkz,   X86::VPANDNDZrmbkz   },
+  { X86::VANDPSZrmbkz,    X86::VANDPDZrmbkz,
+    X86::VPANDQZrmbkz,    X86::VPANDDZrmbkz    },
+  { X86::VANDPSZrmbkz,    X86::VANDPDZrmbkz,
+    X86::VPANDQZrmbkz,    X86::VPANDDZrmbkz    },
+  { X86::VORPSZrmbkz,     X86::VORPDZrmbkz,
+    X86::VPORQZrmbkz,     X86::VPORDZrmbkz     },
+  { X86::VXORPSZrmbkz,    X86::VXORPDZrmbkz,
+    X86::VPXORQZrmbkz,    X86::VPXORDZrmbkz    },
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different

Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=280464&r1=280463&r2=280464&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Fri Sep  2 00:29:09 2016
@@ -594,10 +594,30 @@ define <8 x i64> @addq_broadcast(<8 x i6
 }
 
 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
-; CHECK-LABEL: orq_broadcast:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; CHECK-NEXT:    retq
+; AVX512F-LABEL: orq_broadcast:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: orq_broadcast:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: orq_broadcast:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: orq_broadcast:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: orq_broadcast:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX-NEXT:    retq
   %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   ret <8 x i64> %b
 }
@@ -634,10 +654,30 @@ entry:
 }
 
 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
-; CHECK-LABEL: andqbrst:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
-; CHECK-NEXT:    retq
+; AVX512F-LABEL: andqbrst:
+; AVX512F:       ## BB#0: ## %entry
+; AVX512F-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: andqbrst:
+; AVX512VL:       ## BB#0: ## %entry
+; AVX512VL-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: andqbrst:
+; AVX512BW:       ## BB#0: ## %entry
+; AVX512BW-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: andqbrst:
+; AVX512DQ:       ## BB#0: ## %entry
+; AVX512DQ-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: andqbrst:
+; SKX:       ## BB#0: ## %entry
+; SKX-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0
+; SKX-NEXT:    retq
 entry:
   %a = load i64, i64* %ap, align 8
   %b = insertelement <8 x i64> undef, i64 %a, i32 0

Modified: llvm/trunk/test/CodeGen/X86/avx512-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-logic.ll?rev=280464&r1=280463&r2=280464&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-logic.ll Fri Sep  2 00:29:09 2016
@@ -116,10 +116,15 @@ entry:
 
 
 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
-; ALL-LABEL: orq_broadcast:
-; ALL:       ## BB#0:
-; ALL-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; ALL-NEXT:    retq
+; KNL-LABEL: orq_broadcast:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: orq_broadcast:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX-NEXT:    retq
   %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   ret <8 x i64> %b
 }
@@ -141,10 +146,15 @@ entry:
 }
 
 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
-; ALL-LABEL: andqbrst:
-; ALL:       ## BB#0: ## %entry
-; ALL-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
-; ALL-NEXT:    retq
+; KNL-LABEL: andqbrst:
+; KNL:       ## BB#0: ## %entry
+; KNL-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: andqbrst:
+; SKX:       ## BB#0: ## %entry
+; SKX-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0
+; SKX-NEXT:    retq
 entry:
   %a = load i64, i64* %ap, align 8
   %b = insertelement <8 x i64> undef, i64 %a, i32 0




More information about the llvm-commits mailing list