[llvm] r342237 - [InstCombine] add more tests for x86 blendv (PR38814); NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 14 06:47:33 PDT 2018
Author: spatel
Date: Fri Sep 14 06:47:33 2018
New Revision: 342237
URL: http://llvm.org/viewvc/llvm-project?rev=342237&view=rev
Log:
[InstCombine] add more tests for x86 blendv (PR38814); NFC
Modified:
llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll
Modified: llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll?rev=342237&r1=342236&r2=342237&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll Fri Sep 14 06:47:33 2018
@@ -181,6 +181,8 @@ define <2 x double> @sel_v2f64(<2 x doub
ret <2 x double> %r
}
+; TODO: We can bitcast X, Y, and the select and remove the intrinsic.
+
define <16 x i8> @sel_v4i32(<16 x i8> %x, <16 x i8> %y, <4 x i1> %cond) {
; CHECK-LABEL: @sel_v4i32(
; CHECK-NEXT: [[S:%.*]] = sext <4 x i1> [[COND:%.*]] to <4 x i32>
@@ -205,6 +207,94 @@ define <16 x i8> @sel_v16i8(<16 x i8> %x
ret <16 x i8> %r
}
+; PR38814: https://bugs.llvm.org/show_bug.cgi?id=38814
+; Repeat the tests above using the minimal form that we expect when using C intrinsics in code.
+; This verifies that nothing is interfering with the blend transform. This also tests the
+; expected IR when 1 of the blend operands is a constant 0 vector. Potentially, this could
+; be transformed to bitwise logic in IR, but currently that transform is left to the backend.
+
+define <4 x float> @sel_v4f32_sse_reality(<4 x float>* %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: @sel_v4f32_sse_reality(
+; CHECK-NEXT: [[LD:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[COND:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+; CHECK-NEXT: [[R:%.*]] = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[LD]], <4 x float> zeroinitializer, <4 x float> [[COND]])
+; CHECK-NEXT: ret <4 x float> [[R]]
+;
+ %ld = load <4 x float>, <4 x float>* %x, align 16
+ %cmp = fcmp olt <4 x float> %z, %y
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %cond = bitcast <4 x i32> %sext to <4 x float>
+ %r = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %ld, <4 x float> zeroinitializer, <4 x float> %cond)
+ ret <4 x float> %r
+}
+
+define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: @sel_v2f64_sse_reality(
+; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* [[X:%.*]], align 16
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <2 x double> [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+; CHECK-NEXT: [[COND:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+; CHECK-NEXT: [[R:%.*]] = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[LD]], <2 x double> zeroinitializer, <2 x double> [[COND]])
+; CHECK-NEXT: ret <2 x double> [[R]]
+;
+ %ld = load <2 x double>, <2 x double>* %x, align 16
+ %cmp = fcmp olt <2 x double> %z, %y
+ %sext = sext <2 x i1> %cmp to <2 x i64>
+ %cond = bitcast <2 x i64> %sext to <2 x double>
+ %r = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %ld, <2 x double> zeroinitializer, <2 x double> %cond)
+ ret <2 x double> %r
+}
+
+define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: @sel_v4i32_sse_reality(
+; CHECK-NEXT: [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>*
+; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[XCAST]], align 16
+; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
+; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <4 x i32>
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i32> [[YCAST]], [[ZCAST]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT: [[COND:%.*]] = bitcast <4 x i32> [[SEXT]] to <16 x i8>
+; CHECK-NEXT: [[R:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[LD]], <16 x i8> zeroinitializer, <16 x i8> [[COND]])
+; CHECK-NEXT: [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[RCAST]]
+;
+ %xcast = bitcast <2 x i64>* %x to <16 x i8>*
+ %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
+ %ycast = bitcast <2 x i64> %y to <4 x i32>
+ %zcast = bitcast <2 x i64> %z to <4 x i32>
+ %cmp = icmp sgt <4 x i32> %ycast, %zcast
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %cond = bitcast <4 x i32> %sext to <16 x i8>
+ %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %cond)
+ %rcast = bitcast <16 x i8> %r to <2 x i64>
+ ret <2 x i64> %rcast
+}
+
+define <2 x i64> @sel_v16i8_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: @sel_v16i8_sse_reality(
+; CHECK-NEXT: [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>*
+; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[XCAST]], align 16
+; CHECK-NEXT: [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <16 x i8>
+; CHECK-NEXT: [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <16 x i8>
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <16 x i8> [[YCAST]], [[ZCAST]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8>
+; CHECK-NEXT: [[R:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[LD]], <16 x i8> zeroinitializer, <16 x i8> [[SEXT]])
+; CHECK-NEXT: [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[RCAST]]
+;
+ %xcast = bitcast <2 x i64>* %x to <16 x i8>*
+ %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
+ %ycast = bitcast <2 x i64> %y to <16 x i8>
+ %zcast = bitcast <2 x i64> %z to <16 x i8>
+ %cmp = icmp sgt <16 x i8> %ycast, %zcast
+ %sext = sext <16 x i1> %cmp to <16 x i8>
+ %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %sext)
+ %rcast = bitcast <16 x i8> %r to <2 x i64>
+ ret <2 x i64> %rcast
+}
+
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
More information about the llvm-commits
mailing list