[llvm] 0c8e5be - [VPlan] Simplify redundant trunc (zext A) pairs to A.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 22 03:42:18 PDT 2023
Author: Florian Hahn
Date: 2023-10-22T11:41:38+01:00
New Revision: 0c8e5be6fa085381d200e1ee23addcbc99f2ee24
URL: https://github.com/llvm/llvm-project/commit/0c8e5be6fa085381d200e1ee23addcbc99f2ee24
DIFF: https://github.com/llvm/llvm-project/commit/0c8e5be6fa085381d200e1ee23addcbc99f2ee24.diff
LOG: [VPlan] Simplify redundant trunc (zext A) pairs to A.
Add simplification for redundant trunc(zext A) pairs. Generally apply a
transform from D149903.
Depends on D159200.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D159202
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 5cade16850f97f4..f309ca5f9041898 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -812,6 +812,8 @@ static bool isConstantOne(VPValue *V) {
static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
if (auto *WidenR = dyn_cast<VPWidenRecipe>(&R))
return WidenR->getUnderlyingInstr()->getOpcode();
+ if (auto *WidenC = dyn_cast<VPWidenCastRecipe>(&R))
+ return WidenC->getOpcode();
if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R))
return RepR->getUnderlyingInstr()->getOpcode();
if (auto *VPI = dyn_cast<VPInstruction>(&R))
@@ -819,16 +821,39 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
return 0;
}
+/// Return the scalar size in bits for \p VPV if possible.
+static Type *getTypeForVPValue(VPValue *VPV) {
+ // TODO: Replace with VPlan type inference once ready.
+ if (auto *VPC = dyn_cast<VPWidenCastRecipe>(VPV))
+ return VPC->getResultType();
+ auto *UV = VPV->getUnderlyingValue();
+ return UV->getType();
+}
+
/// Try to simplify recipe \p R.
static void simplifyRecipe(VPRecipeBase &R) {
- unsigned Opcode = getOpcodeForRecipe(R);
- if (Opcode == Instruction::Mul) {
+ switch (getOpcodeForRecipe(R)) {
+ case Instruction::Mul: {
VPValue *A = R.getOperand(0);
VPValue *B = R.getOperand(1);
if (isConstantOne(A))
return R.getVPSingleValue()->replaceAllUsesWith(B);
if (isConstantOne(B))
return R.getVPSingleValue()->replaceAllUsesWith(A);
+ break;
+ }
+ case Instruction::Trunc: {
+ VPRecipeBase *Zext = R.getOperand(0)->getDefiningRecipe();
+ if (!Zext || getOpcodeForRecipe(*Zext) != Instruction::ZExt)
+ break;
+ VPValue *A = Zext->getOperand(0);
+ VPValue *Trunc = R.getVPSingleValue();
+ if (getTypeForVPValue(Trunc) == getTypeForVPValue(A))
+ Trunc->replaceAllUsesWith(A);
+ break;
+ }
+ default:
+ break;
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 926168c622fbde3..bb0e2b19cfa4b82 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -432,17 +432,13 @@ define void @minimal_bit_widths(i1 %c) {
; UNROLL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]]
; UNROLL: pred.store.if:
-; UNROLL-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32
-; UNROLL-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
-; UNROLL-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1
-; UNROLL-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32
-; UNROLL-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8
-; UNROLL-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1
+; UNROLL-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
+; UNROLL-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]]
; UNROLL: pred.store.continue3:
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; UNROLL-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; UNROLL-NEXT: br i1 [[TMP6]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
@@ -461,21 +457,17 @@ define void @minimal_bit_widths(i1 %c) {
; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NOSIMPLIFY: pred.store.if:
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
-; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1
+; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NOSIMPLIFY: pred.store.continue:
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
; UNROLL-NOSIMPLIFY: pred.store.if2:
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8
-; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1
+; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]]
; UNROLL-NOSIMPLIFY: pred.store.continue3:
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; UNROLL-NOSIMPLIFY: middle.block:
; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NOSIMPLIFY: scalar.ph:
@@ -515,27 +507,23 @@ define void @minimal_bit_widths(i1 %c) {
; VEC-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
; VEC-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VEC: pred.store.if:
-; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
-; VEC-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
-; VEC-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
-; VEC-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP5]] to i8
-; VEC-NEXT: store i8 [[TMP7]], ptr [[TMP6]], align 1
+; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]]
+; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
+; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
; VEC-NEXT: br label [[PRED_STORE_CONTINUE]]
; VEC: pred.store.continue:
-; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
-; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
+; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
+; VEC-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
; VEC: pred.store.if2:
-; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
-; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
-; VEC-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
-; VEC-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr undef, i64 [[TMP9]]
-; VEC-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP11]] to i8
-; VEC-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1
+; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
+; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr undef, i64 [[TMP7]]
+; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
+; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
; VEC-NEXT: br label [[PRED_STORE_CONTINUE3]]
; VEC: pred.store.continue3:
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VEC-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; VEC-NEXT: br i1 [[TMP14]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; VEC-NEXT: br i1 [[TMP10]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; VEC: for.end:
; VEC-NEXT: ret void
;
@@ -606,21 +594,17 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) {
; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP3]], align 1
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NOSIMPLIFY: pred.store.if:
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
-; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1
+; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP2]], align 1
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NOSIMPLIFY: pred.store.continue:
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
; UNROLL-NOSIMPLIFY: pred.store.if2:
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8
-; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1
+; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], ptr [[TMP3]], align 1
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]]
; UNROLL-NOSIMPLIFY: pred.store.continue3:
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NOSIMPLIFY: middle.block:
; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NOSIMPLIFY: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
index 444f36ad3a739c7..0dac63d54557d7a 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -29,9 +29,7 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK: pred.store.if:
-; CHECK-NEXT: CLONE ir<%tmp4> = zext ir<%tmp3>
-; CHECK-NEXT: CLONE ir<%tmp5> = trunc ir<%tmp4>
-; CHECK-NEXT: CLONE store ir<%tmp5>, ir<%tmp2>
+; CHECK-NEXT: CLONE store ir<%tmp3>, ir<%tmp2>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK: pred.store.continue:
More information about the llvm-commits
mailing list