[llvm] r292474 - [AVX-512] Support ADD/SUB/MUL of mask vectors
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 18 23:12:36 PST 2017
Author: ctopper
Date: Thu Jan 19 01:12:35 2017
New Revision: 292474
URL: http://llvm.org/viewvc/llvm-project?rev=292474&view=rev
Log:
[AVX-512] Support ADD/SUB/MUL of mask vectors
Summary:
Currently we expand and scalarize these operations, but I think we should be able to implement ADD/SUB with KXOR and MUL with KAND.
We already do this for scalar i1 operations so I just extended it to vectors of i1.
Reviewers: zvi, delena
Reviewed By: delena
Subscribers: guyblank, llvm-commits
Differential Revision: https://reviews.llvm.org/D28888
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=292474&r1=292473&r2=292474&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 19 01:12:35 2017
@@ -1357,12 +1357,12 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
- setOperationAction(ISD::ADD, MVT::v8i1, Expand);
- setOperationAction(ISD::ADD, MVT::v16i1, Expand);
- setOperationAction(ISD::SUB, MVT::v8i1, Expand);
- setOperationAction(ISD::SUB, MVT::v16i1, Expand);
- setOperationAction(ISD::MUL, MVT::v8i1, Expand);
- setOperationAction(ISD::MUL, MVT::v16i1, Expand);
+ setOperationAction(ISD::ADD, MVT::v8i1, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i1, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
@@ -1460,12 +1460,12 @@ X86TargetLowering::X86TargetLowering(con
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
- setOperationAction(ISD::ADD, MVT::v32i1, Expand);
- setOperationAction(ISD::ADD, MVT::v64i1, Expand);
- setOperationAction(ISD::SUB, MVT::v32i1, Expand);
- setOperationAction(ISD::SUB, MVT::v64i1, Expand);
- setOperationAction(ISD::MUL, MVT::v32i1, Expand);
- setOperationAction(ISD::MUL, MVT::v64i1, Expand);
+ setOperationAction(ISD::ADD, MVT::v32i1, Custom);
+ setOperationAction(ISD::ADD, MVT::v64i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v64i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v32i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v64i1, Custom);
setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
@@ -1574,9 +1574,9 @@ X86TargetLowering::X86TargetLowering(con
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
- setOperationAction(ISD::ADD, VT, Expand);
- setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::MUL, VT, Expand);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::SUB, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Custom);
@@ -20847,8 +20847,9 @@ static SDValue Lower512IntArith(SDValue
}
static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
- if (Op.getValueType() == MVT::i1)
- return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getScalarType() == MVT::i1)
+ return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
@@ -20868,7 +20869,7 @@ static SDValue LowerMUL(SDValue Op, cons
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- if (VT == MVT::i1)
+ if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
// Decompose 256-bit ops into smaller 128-bit ops.
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=292474&r1=292473&r2=292474&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Jan 19 01:12:35 2017
@@ -1986,3 +1986,117 @@ define i32 @test_bitcast_v16i1_zext(<16
%val1 = add i32 %val, %val
ret i32 %val1
}
+
+define i16 @test_v16i1_add(i16 %x, i16 %y) {
+; CHECK-LABEL: test_v16i1_add:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: kxorw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %m0 = bitcast i16 %x to <16 x i1>
+ %m1 = bitcast i16 %y to <16 x i1>
+ %m2 = add <16 x i1> %m0, %m1
+ %ret = bitcast <16 x i1> %m2 to i16
+ ret i16 %ret
+}
+
+define i16 @test_v16i1_sub(i16 %x, i16 %y) {
+; CHECK-LABEL: test_v16i1_sub:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: kxorw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %m0 = bitcast i16 %x to <16 x i1>
+ %m1 = bitcast i16 %y to <16 x i1>
+ %m2 = sub <16 x i1> %m0, %m1
+ %ret = bitcast <16 x i1> %m2 to i16
+ ret i16 %ret
+}
+
+define i16 @test_v16i1_mul(i16 %x, i16 %y) {
+; CHECK-LABEL: test_v16i1_mul:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %m0 = bitcast i16 %x to <16 x i1>
+ %m1 = bitcast i16 %y to <16 x i1>
+ %m2 = mul <16 x i1> %m0, %m1
+ %ret = bitcast <16 x i1> %m2 to i16
+ ret i16 %ret
+}
+
+define i8 @test_v8i1_add(i8 %x, i8 %y) {
+; KNL-LABEL: test_v8i1_add:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kmovw %esi, %k1
+; KNL-NEXT: kxorw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_v8i1_add:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb %edi, %k0
+; SKX-NEXT: kmovb %esi, %k1
+; SKX-NEXT: kxorb %k1, %k0, %k0
+; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: retq
+ %m0 = bitcast i8 %x to <8 x i1>
+ %m1 = bitcast i8 %y to <8 x i1>
+ %m2 = add <8 x i1> %m0, %m1
+ %ret = bitcast <8 x i1> %m2 to i8
+ ret i8 %ret
+}
+
+define i8 @test_v8i1_sub(i8 %x, i8 %y) {
+; KNL-LABEL: test_v8i1_sub:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kmovw %esi, %k1
+; KNL-NEXT: kxorw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_v8i1_sub:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb %edi, %k0
+; SKX-NEXT: kmovb %esi, %k1
+; SKX-NEXT: kxorb %k1, %k0, %k0
+; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: retq
+ %m0 = bitcast i8 %x to <8 x i1>
+ %m1 = bitcast i8 %y to <8 x i1>
+ %m2 = sub <8 x i1> %m0, %m1
+ %ret = bitcast <8 x i1> %m2 to i8
+ ret i8 %ret
+}
+
+define i8 @test_v8i1_mul(i8 %x, i8 %y) {
+; KNL-LABEL: test_v8i1_mul:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kmovw %esi, %k1
+; KNL-NEXT: kandw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_v8i1_mul:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb %edi, %k0
+; SKX-NEXT: kmovb %esi, %k1
+; SKX-NEXT: kandb %k1, %k0, %k0
+; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: retq
+ %m0 = bitcast i8 %x to <8 x i1>
+ %m1 = bitcast i8 %y to <8 x i1>
+ %m2 = mul <8 x i1> %m0, %m1
+ %ret = bitcast <8 x i1> %m2 to i8
+ ret i8 %ret
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll?rev=292474&r1=292473&r2=292474&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll Thu Jan 19 01:12:35 2017
@@ -150,3 +150,93 @@ define i64 @mand64_mem(<64 x i1>* %x, <6
%ret = bitcast <64 x i1> %me to i64
ret i64 %ret
}
+
+define i32 @test_v32i1_add(i32 %x, i32 %y) {
+; CHECK-LABEL: test_v32i1_add:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k0
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: kxord %k1, %k0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+ %m0 = bitcast i32 %x to <32 x i1>
+ %m1 = bitcast i32 %y to <32 x i1>
+ %m2 = add <32 x i1> %m0, %m1
+ %ret = bitcast <32 x i1> %m2 to i32
+ ret i32 %ret
+}
+
+define i32 @test_v32i1_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: test_v32i1_sub:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k0
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: kxord %k1, %k0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+ %m0 = bitcast i32 %x to <32 x i1>
+ %m1 = bitcast i32 %y to <32 x i1>
+ %m2 = sub <32 x i1> %m0, %m1
+ %ret = bitcast <32 x i1> %m2 to i32
+ ret i32 %ret
+}
+
+define i32 @test_v32i1_mul(i32 %x, i32 %y) {
+; CHECK-LABEL: test_v32i1_mul:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k0
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: kandd %k1, %k0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+ %m0 = bitcast i32 %x to <32 x i1>
+ %m1 = bitcast i32 %y to <32 x i1>
+ %m2 = mul <32 x i1> %m0, %m1
+ %ret = bitcast <32 x i1> %m2 to i32
+ ret i32 %ret
+}
+
+define i64 @test_v64i1_add(i64 %x, i64 %y) {
+; CHECK-LABEL: test_v64i1_add:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovq %rdi, %k0
+; CHECK-NEXT: kmovq %rsi, %k1
+; CHECK-NEXT: kxorq %k1, %k0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+ %m0 = bitcast i64 %x to <64 x i1>
+ %m1 = bitcast i64 %y to <64 x i1>
+ %m2 = add <64 x i1> %m0, %m1
+ %ret = bitcast <64 x i1> %m2 to i64
+ ret i64 %ret
+}
+
+define i64 @test_v64i1_sub(i64 %x, i64 %y) {
+; CHECK-LABEL: test_v64i1_sub:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovq %rdi, %k0
+; CHECK-NEXT: kmovq %rsi, %k1
+; CHECK-NEXT: kxorq %k1, %k0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+ %m0 = bitcast i64 %x to <64 x i1>
+ %m1 = bitcast i64 %y to <64 x i1>
+ %m2 = sub <64 x i1> %m0, %m1
+ %ret = bitcast <64 x i1> %m2 to i64
+ ret i64 %ret
+}
+
+define i64 @test_v64i1_mul(i64 %x, i64 %y) {
+; CHECK-LABEL: test_v64i1_mul:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovq %rdi, %k0
+; CHECK-NEXT: kmovq %rsi, %k1
+; CHECK-NEXT: kandq %k1, %k0, %k0
+; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: retq
+ %m0 = bitcast i64 %x to <64 x i1>
+ %m1 = bitcast i64 %y to <64 x i1>
+ %m2 = mul <64 x i1> %m0, %m1
+ %ret = bitcast <64 x i1> %m2 to i64
+ ret i64 %ret
+}
More information about the llvm-commits
mailing list