[PATCH] R600: Add codegen for more builtins

Tue Jul 15 08:05:17 PDT 2014

On Tue, Jul 15, 2014 at 02:22:54AM +0000, Matt Arsenault wrote:
> These can't simply use GCCBuiltin since they come in f32 and f64 varieties, and GCCBuiltin apparently doesn't handle that.
> 

LGTM.

> http://reviews.llvm.org/D4506
> 
> Files:
>   include/clang/Basic/BuiltinsR600.def
>   lib/CodeGen/CGBuiltin.cpp
>   test/CodeGenOpenCL/builtins-r600.cl

> Index: include/clang/Basic/BuiltinsR600.def
> ===================================================================
> --- include/clang/Basic/BuiltinsR600.def
> +++ include/clang/Basic/BuiltinsR600.def
> @@ -16,5 +16,17 @@
>  
>  BUILTIN(__builtin_amdgpu_div_scale, "dddbb*", "n")
>  BUILTIN(__builtin_amdgpu_div_scalef, "fffbb*", "n")
> +BUILTIN(__builtin_amdgpu_div_fmas, "dddd", "nc")
> +BUILTIN(__builtin_amdgpu_div_fmasf, "ffff", "nc")
> +BUILTIN(__builtin_amdgpu_div_fixup, "dddd", "nc")
> +BUILTIN(__builtin_amdgpu_div_fixupf, "ffff", "nc")
> +BUILTIN(__builtin_amdgpu_trig_preop, "ddi", "nc")
> +BUILTIN(__builtin_amdgpu_trig_preopf, "ffi", "nc")
> +BUILTIN(__builtin_amdgpu_rcp, "dd", "nc")
> +BUILTIN(__builtin_amdgpu_rcpf, "ff", "nc")
> +BUILTIN(__builtin_amdgpu_rsq, "dd", "nc")
> +BUILTIN(__builtin_amdgpu_rsqf, "ff", "nc")
> +BUILTIN(__builtin_amdgpu_rsq_clamped, "dd", "nc")
> +BUILTIN(__builtin_amdgpu_rsq_clampedf, "ff", "nc")
>  
>  #undef BUILTIN
> Index: lib/CodeGen/CGBuiltin.cpp
> ===================================================================
> --- lib/CodeGen/CGBuiltin.cpp
> +++ lib/CodeGen/CGBuiltin.cpp
> @@ -5976,6 +5976,28 @@
>    }
>  }
>  
> +// Emit an intrinsic that has 1 float or double.
> +static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
> +                                 const CallExpr *E,
> +                                 unsigned IntrinsicID) {
> +  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
> +
> +  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
> +  return CGF.Builder.CreateCall(F, Src0);
> +}
> +
> +// Emit an intrinsic that has 3 float or double operands.
> +static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
> +                                   const CallExpr *E,
> +                                   unsigned IntrinsicID) {
> +  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
> +  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
> +  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
> +
> +  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
> +  return CGF.Builder.CreateCall3(F, Src0, Src1, Src2);
> +}
> +
>  Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
>                                              const CallExpr *E) {
>    switch (BuiltinID) {
> @@ -6006,7 +6028,30 @@
>      llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first);
>      FlagStore->setAlignment(FlagOutPtr.second);
>      return Result;
> -  } default:
> +  }
> +  case R600::BI__builtin_amdgpu_div_fmas:
> +  case R600::BI__builtin_amdgpu_div_fmasf:
> +    return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fmas);
> +  case R600::BI__builtin_amdgpu_div_fixup:
> +  case R600::BI__builtin_amdgpu_div_fixupf:
> +    return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
> +  case R600::BI__builtin_amdgpu_trig_preop:
> +  case R600::BI__builtin_amdgpu_trig_preopf: {
> +    Value *Src0 = EmitScalarExpr(E->getArg(0));
> +    Value *Src1 = EmitScalarExpr(E->getArg(1));
> +    Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_trig_preop, Src0->getType());
> +    return Builder.CreateCall2(F, Src0, Src1);
> +  }
> +  case R600::BI__builtin_amdgpu_rcp:
> +  case R600::BI__builtin_amdgpu_rcpf:
> +    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
> +  case R600::BI__builtin_amdgpu_rsq:
> +  case R600::BI__builtin_amdgpu_rsqf:
> +    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
> +  case R600::BI__builtin_amdgpu_rsq_clamped:
> +  case R600::BI__builtin_amdgpu_rsq_clampedf:
> +    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
> +   default:
>      return nullptr;
>    }
>  }
> Index: test/CodeGenOpenCL/builtins-r600.cl
> ===================================================================
> --- test/CodeGenOpenCL/builtins-r600.cl
> +++ test/CodeGenOpenCL/builtins-r600.cl
> @@ -28,3 +28,87 @@
>    *out = __builtin_amdgpu_div_scalef(a, b, true, &flag);
>    *flagout = flag;
>  }
> +
> +// CHECK-LABEL: @test_div_fmas_f32
> +// CHECK: call float @llvm.AMDGPU.div.fmas.f32
> +void test_div_fmas_f32(global float* out, float a, float b, float c)
> +{
> +  *out = __builtin_amdgpu_div_fmasf(a, b, c);
> +}
> +
> +// CHECK-LABEL: @test_div_fmas_f64
> +// CHECK: call double @llvm.AMDGPU.div.fmas.f64
> +void test_div_fmas_f64(global double* out, double a, double b, double c)
> +{
> +  *out = __builtin_amdgpu_div_fmas(a, b, c);
> +}
> +
> +// CHECK-LABEL: @test_div_fixup_f32
> +// CHECK: call float @llvm.AMDGPU.div.fixup.f32
> +void test_div_fixup_f32(global float* out, float a, float b, float c)
> +{
> +  *out = __builtin_amdgpu_div_fixupf(a, b, c);
> +}
> +
> +// CHECK-LABEL: @test_div_fixup_f64
> +// CHECK: call double @llvm.AMDGPU.div.fixup.f64
> +void test_div_fixup_f64(global double* out, double a, double b, double c)
> +{
> +  *out = __builtin_amdgpu_div_fixup(a, b, c);
> +}
> +
> +// CHECK-LABEL: @test_trig_preop_f32
> +// CHECK: call float @llvm.AMDGPU.trig.preop.f32
> +void test_trig_preop_f32(global float* out, float a, int b)
> +{
> +  *out = __builtin_amdgpu_trig_preopf(a, b);
> +}
> +
> +// CHECK-LABEL: @test_trig_preop_f64
> +// CHECK: call double @llvm.AMDGPU.trig.preop.f64
> +void test_trig_preop_f64(global double* out, double a, int b)
> +{
> +  *out = __builtin_amdgpu_trig_preop(a, b);
> +}
> +
> +// CHECK-LABEL: @test_rcp_f32
> +// CHECK: call float @llvm.AMDGPU.rcp.f32
> +void test_rcp_f32(global float* out, float a)
> +{
> +  *out = __builtin_amdgpu_rcpf(a);
> +}
> +
> +// CHECK-LABEL: @test_rcp_f64
> +// CHECK: call double @llvm.AMDGPU.rcp.f64
> +void test_rcp_f64(global double* out, double a)
> +{
> +  *out = __builtin_amdgpu_rcp(a);
> +}
> +
> +// CHECK-LABEL: @test_rsq_f32
> +// CHECK: call float @llvm.AMDGPU.rsq.f32
> +void test_rsq_f32(global float* out, float a)
> +{
> +  *out = __builtin_amdgpu_rsqf(a);
> +}
> +
> +// CHECK-LABEL: @test_rsq_f64
> +// CHECK: call double @llvm.AMDGPU.rsq.f64
> +void test_rsq_f64(global double* out, double a)
> +{
> +  *out = __builtin_amdgpu_rsq(a);
> +}
> +
> +// CHECK-LABEL: @test_rsq_clamped_f32
> +// CHECK: call float @llvm.AMDGPU.rsq.clamped.f32
> +void test_rsq_clamped_f32(global float* out, float a)
> +{
> +  *out = __builtin_amdgpu_rsq_clampedf(a);
> +}
> +
> +// CHECK-LABEL: @test_rsq_clamped_f64
> +// CHECK: call double @llvm.AMDGPU.rsq.clamped.f64
> +void test_rsq_clamped_f64(global double* out, double a)
> +{
> +  *out = __builtin_amdgpu_rsq_clamped(a);
> +}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits