[clang] [llvm] [HLSL] Move length support out of the DirectX Backend (PR #121611)

Mon Jan 6 12:55:39 PST 2025

================
@@ -1,73 +1,151 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: call half @llvm.fabs.f16(half
-// NO_HALF: call float @llvm.fabs.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_length_half(half p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half2(half2 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half3(half3 p0)
-{
-  return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half4(half4 p0)
-{
-  return length(p0);
-}
-
-// CHECK: define noundef float @
-// CHECK: call float @llvm.fabs.f32(float
-// CHECK: ret float
-float test_length_float(float p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
-// CHECK: ret float %hlsl.length
-float test_length_float2(float2 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
-// CHECK: ret float %hlsl.length
-float test_length_float3(float3 p0)
-{
-  return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
-// CHECK: ret float %hlsl.length
-float test_length_float4(float4 p0)
-{
-  return length(p0);
-}
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O2 -o - | FileCheck %s
+
+// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
+// CHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
+// CHECK-NEXT:    ret half [[ELT_ABS_I]]
+//
+half test_length_half(half p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half2Dv2_Dh(
+// CHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd half [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half2(half2 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half3Dv3_Dh(
+// CHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_1]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half3(half3 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half4Dv4_Dh(
+// CHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT:    [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
+// CHECK-NEXT:    [[ADD_I_2:%.*]] = fadd half [[ADD_I_1]], [[VECEXT1_I_2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_2]])
+// CHECK-NEXT:    ret half [[TMP0]]
+//
+half test_length_half4(half4 p0)
+{
+  return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z17test_length_floatf(
+// CHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
+// CHECK-NEXT:    ret float [[ELT_ABS_I]]
+//
+float test_length_float(float p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float2Dv2_f(
+// CHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd float [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float2(float2 p0)
+{
+  return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float3Dv3_f(
+// CHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_1]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float3(float3 p0)
+{
+  return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float4Dv4_f(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT:    [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+// CHECK-NEXT:    [[ADD_I_2:%.*]] = fadd float [[ADD_I_1]], [[VECEXT1_I_2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_2]])
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_length_float4(float4 p0)
+{
+  return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z26test_length_float4_extractDv4_f(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+// CHECK-NEXT:    [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+// CHECK-NEXT:    [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+// CHECK-NEXT:    [[ADD_I12:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT:    [[ADD_I12_1:%.*]] = fadd float [[ADD_I12]], [[VECEXT1_I_1]]
+// CHECK-NEXT:    [[ADD_I12_2:%.*]] = fadd float [[ADD_I12_1]], [[VECEXT1_I_2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I12_2]])
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP0]]
+// CHECK-NEXT:    ret float [[ADD]]
+//
+float test_length_float4_extract(float4 p0)
+{
+  return length(p0) + length(p0);
----------------
llvm-beanz wrote:

In general I dislike tests that depend on optimizations, I'd prefer that Clang verify the unoptimized code gen and we rely on LLVM's testing to ensure that it doesn't make the program illegal during optimization. Having clang validate optimized IR opens the door to us getting the right answer for the wrong reasons.

https://github.com/llvm/llvm-project/pull/121611