r274608 - [X86] Use native IR for immediate values 0-7 of packed fp cmp builtins. This makes them the same as what is done when using the SSE builtins for these same encodings.

Tue Jul 5 23:27:31 PDT 2016

Author: ctopper
Date: Wed Jul  6 01:27:31 2016
New Revision: 274608

URL: http://llvm.org/viewvc/llvm-project?rev=274608&view=rev
Log:
[X86] Use native IR for immediate values 0-7 of packed fp cmp builtins. This makes them the same as what is done when using the SSE builtins for these same encodings.

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/avx2-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=274608&r1=274607&r2=274608&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Jul  6 01:27:31 2016
@@ -7289,6 +7289,51 @@ Value *CodeGenFunction::EmitX86BuiltinEx
     return getVectorFCmpIR(CmpInst::FCMP_UGT, V2F64);
   case X86::BI__builtin_ia32_cmpordpd:
     return getVectorFCmpIR(CmpInst::FCMP_ORD, V2F64);
+  case X86::BI__builtin_ia32_cmpps:
+  case X86::BI__builtin_ia32_cmpps256:
+  case X86::BI__builtin_ia32_cmppd:
+  case X86::BI__builtin_ia32_cmppd256: {
+    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+    // If this one of the SSE immediates, we can use native IR.
+    if (CC < 8) {
+      FCmpInst::Predicate Pred;
+      switch (CC) {
+      case 0: Pred = FCmpInst::FCMP_OEQ; break;
+      case 1: Pred = FCmpInst::FCMP_OLT; break;
+      case 2: Pred = FCmpInst::FCMP_OLE; break;
+      case 3: Pred = FCmpInst::FCMP_UNO; break;
+      case 4: Pred = FCmpInst::FCMP_UNE; break;
+      case 5: Pred = FCmpInst::FCMP_UGE; break;
+      case 6: Pred = FCmpInst::FCMP_UGT; break;
+      case 7: Pred = FCmpInst::FCMP_ORD; break;
+      }
+      Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+      auto *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
+      auto *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
+      Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
+      return Builder.CreateBitCast(Sext, FPVecTy);
+    }
+
+    // We can't handle 8-31 immediates with native IR, use the intrinsic.
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpps:
+      ID = Intrinsic::x86_sse_cmp_ps;
+      break;
+    case X86::BI__builtin_ia32_cmpps256:
+      ID = Intrinsic::x86_avx_cmp_ps_256;
+      break;
+    case X86::BI__builtin_ia32_cmppd:
+      ID = Intrinsic::x86_sse2_cmp_pd;
+      break;
+    case X86::BI__builtin_ia32_cmppd256:
+      ID = Intrinsic::x86_avx_cmp_pd_256;
+      break;
+    }
+
+    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+  }
 
   // SSE scalar comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqss:

Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=274608&r1=274607&r2=274608&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Wed Jul  6 01:27:31 2016
@@ -488,7 +488,9 @@ __m128d test_mm_mask_i32gather_pd(__m128
 
 __m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm256_i32gather_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0)
+  // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x double>
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
   // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i32gather_pd(b, c, 2);
 }
@@ -516,7 +518,9 @@ __m128 test_mm_mask_i32gather_ps(__m128
 
 __m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i32gather_ps
-  // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 0)
+  // CHECK:         [[CMP:%.*]] = fcmp oeq <8 x float>
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
   // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
   return _mm256_i32gather_ps(b, c, 2);
 }
@@ -592,7 +596,9 @@ __m128d test_mm_mask_i64gather_pd(__m128
 
 __m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 0)
+  // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x double>
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
   // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i64gather_pd(b, c, 2);
 }