[llvm] [LV] Add initial support for vectorizing literal struct return values (PR #109833)
Sam Tebbs via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 01:41:37 PDT 2024
================
@@ -0,0 +1,284 @@
+; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Tests basic vectorization of homogeneous struct literal returns.
+
+define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; NEON-LABEL: define void @struct_return_f32_widen
+; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]])
+; NEON: vector.body:
+; NEON: [[WIDE_CALL:%.*]] = call { <4 x float>, <4 x float> } @fixed_vec_foo(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON: [[WIDE_A:%.*]] = extractvalue { <4 x float>, <4 x float> } [[WIDE_CALL]], 0
+; NEON: [[WIDE_B:%.*]] = extractvalue { <4 x float>, <4 x float> } [[WIDE_CALL]], 1
+; NEON: store <4 x float> [[WIDE_A]], ptr {{%.*}}, align 4
+; NEON: store <4 x float> [[WIDE_B]], ptr {{%.*}}, align 4
+;
+; SVE_TF-LABEL: define void @struct_return_f32_widen
+; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]])
+; SVE_TF: vector.body:
+; SVE_TF: [[WIDE_CALL:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE_TF: [[WIDE_A:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[WIDE_CALL]], 0
+; SVE_TF: [[WIDE_B:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[WIDE_CALL]], 1
+; SVE_TF: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[WIDE_A]], ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
+; SVE_TF: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[WIDE_B]], ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %in, i64 %indvars.iv
+ %in_val = load float, ptr %arrayidx, align 4
+ %call = tail call { float, float } @foo(float %in_val) #0
+ %extract_a = extractvalue { float, float } %call, 0
+ %extract_b = extractvalue { float, float } %call, 1
+ %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %indvars.iv
+ store float %extract_a, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %indvars.iv
+ store float %extract_b, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
+; NEON-LABEL: define void @struct_return_f64_widen
+; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]])
+; NEON: vector.body:
+; NEON: [[WIDE_CALL:%.*]] = call { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON: [[WIDE_A:%.*]] = extractvalue { <2 x double>, <2 x double> } [[WIDE_CALL]], 0
+; NEON: [[WIDE_B:%.*]] = extractvalue { <2 x double>, <2 x double> } [[WIDE_CALL]], 1
+; NEON: store <2 x double> [[WIDE_A]], ptr {{%.*}}, align 8
+; NEON: store <2 x double> [[WIDE_B]], ptr {{%.*}}, align 8
+;
+; SVE_TF-LABEL: define void @struct_return_f64_widen
+; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]])
+; SVE_TF: vector.body:
+; SVE_TF: [[WIDE_CALL:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE_TF: [[WIDE_A:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[WIDE_CALL]], 0
+; SVE_TF: [[WIDE_B:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[WIDE_CALL]], 1
+; SVE_TF: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[WIDE_A]], ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; SVE_TF: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[WIDE_B]], ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %in, i64 %indvars.iv
+ %in_val = load double, ptr %arrayidx, align 8
+ %call = tail call { double, double } @bar(double %in_val) #1
+ %extract_a = extractvalue { double, double } %call, 0
+ %extract_b = extractvalue { double, double } %call, 1
+ %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %indvars.iv
+ store double %extract_a, ptr %arrayidx2, align 8
+ %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %indvars.iv
+ store double %extract_b, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
----------------
SamTebbs33 wrote:
Should there be SVE_TF checks here?
https://github.com/llvm/llvm-project/pull/109833
More information about the llvm-commits
mailing list