[llvm] c7d39fd - [LV][SLP] Add tests for vectorizing fptoi_sat intrinsics. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon May 2 07:11:51 PDT 2022
Author: David Green
Date: 2022-05-02T15:11:44+01:00
New Revision: c7d39fd61ade68db601355b395f45b1d2bf6b8fb
URL: https://github.com/llvm/llvm-project/commit/c7d39fd61ade68db601355b395f45b1d2bf6b8fb
DIFF: https://github.com/llvm/llvm-project/commit/c7d39fd61ade68db601355b395f45b1d2bf6b8fb.diff
LOG: [LV][SLP] Add tests for vectorizing fptoi_sat intrinsics. NFC
Added:
llvm/test/Transforms/LoopVectorize/fpsat.ll
llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/fpsat.ll b/llvm/test/Transforms/LoopVectorize/fpsat.ll
new file mode 100644
index 0000000000000..6e5f28f3c67c5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/fpsat.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+
+define void @signed(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @signed(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[TMP0]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv
+ store i32 %1, ptr %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @unsigned(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @unsigned(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[TMP0]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+;
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv
+ store i32 %1, ptr %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll
new file mode 100644
index 0000000000000..90b2637c1dc82
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mtriple=aarch64-none-eabi < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @signed(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @signed(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[L6]])
+; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1
+; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2
+; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3
+; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l0)
+ %l3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l2)
+ %l5 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l4)
+ %l7 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l6)
+ store i32 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
+ store i32 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
+ store i32 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
+ store i32 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @unsigned(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @unsigned(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.fptoui.sat.i32.f32(float [[L6]])
+; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1
+; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2
+; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3
+; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l0)
+ %l3 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l2)
+ %l5 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l4)
+ %l7 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l6)
+ store i32 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
+ store i32 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
+ store i32 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
+ store i32 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
More information about the llvm-commits
mailing list