[llvm] 6d53d35 - [TypePromotion] Avoid some unnecessary truncs
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Fri May 13 01:46:15 PDT 2022
Author: Sam Parker
Date: 2022-05-13T09:45:20+01:00
New Revision: 6d53d35efd3b876d2cc70a776fb168d6debfeba5
URL: https://github.com/llvm/llvm-project/commit/6d53d35efd3b876d2cc70a776fb168d6debfeba5
DIFF: https://github.com/llvm/llvm-project/commit/6d53d35efd3b876d2cc70a776fb168d6debfeba5.diff
LOG: [TypePromotion] Avoid some unnecessary truncs
Recommit.
Check for legal zext 'sinks' before inserting a trunc.
Differential Revision: https://reviews.llvm.org/D115451
Added:
llvm/test/Transforms/TypePromotion/AArch64/phi-zext-gep.ll
Modified:
llvm/lib/CodeGen/TypePromotion.cpp
llvm/test/CodeGen/AArch64/bfis-in-loop.ll
llvm/test/Transforms/TypePromotion/AArch64/convert-utf.ll
llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index cb0d99326201..016feb0df2da 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -549,6 +549,11 @@ void IRPromoter::TruncateSinks() {
continue;
}
+ // Don't insert a trunc for a zext which can still legally promote.
+ if (auto ZExt = dyn_cast<ZExtInst>(I))
+ if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth)
+ continue;
+
// Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Type *Ty = TruncTysMap[I][i];
diff --git a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
index ec1389d501ef..6ee2feb2c217 100644
--- a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
+++ b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
@@ -21,17 +21,18 @@ define i64 @bfis_in_loop_zero() {
; CHECK-NEXT: .LBB0_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x9, #72]
-; CHECK-NEXT: and x0, x0, #0xffffffff00000000
-; CHECK-NEXT: ldr x12, [x9, #8]
; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: lsr w11, w10, #8
+; CHECK-NEXT: ubfx x11, x10, #8, #24
+; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w8, w8, w11, eq
-; CHECK-NEXT: cset w11, ne
-; CHECK-NEXT: ldr x9, [x12, #16]
-; CHECK-NEXT: bfi w10, w8, #8, #24
-; CHECK-NEXT: bfi x0, x11, #16, #1
-; CHECK-NEXT: bfxil x0, x10, #0, #16
-; CHECK-NEXT: cbnz x12, .LBB0_1
+; CHECK-NEXT: ldr x11, [x9, #8]
+; CHECK-NEXT: and x9, x10, #0xff
+; CHECK-NEXT: and x10, x0, #0xffffffff00000000
+; CHECK-NEXT: bfi x9, x8, #8, #32
+; CHECK-NEXT: bfi x10, x12, #16, #1
+; CHECK-NEXT: orr x0, x10, x9
+; CHECK-NEXT: ldr x9, [x11, #16]
+; CHECK-NEXT: cbnz x11, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
@@ -89,17 +90,18 @@ define i64 @bfis_in_loop_undef() {
; CHECK-NEXT: .LBB1_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x9, #72]
-; CHECK-NEXT: and x0, x0, #0xffffffff00000000
-; CHECK-NEXT: ldr x12, [x9, #8]
; CHECK-NEXT: cmp w10, #0
-; CHECK-NEXT: lsr w11, w10, #8
+; CHECK-NEXT: ubfx x11, x10, #8, #24
+; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w8, w8, w11, eq
-; CHECK-NEXT: cset w11, ne
-; CHECK-NEXT: ldr x9, [x12, #16]
-; CHECK-NEXT: bfi w10, w8, #8, #24
-; CHECK-NEXT: bfi x0, x11, #16, #1
-; CHECK-NEXT: bfxil x0, x10, #0, #16
-; CHECK-NEXT: cbnz x12, .LBB1_1
+; CHECK-NEXT: ldr x11, [x9, #8]
+; CHECK-NEXT: and x9, x10, #0xff
+; CHECK-NEXT: and x10, x0, #0xffffffff00000000
+; CHECK-NEXT: bfi x9, x8, #8, #32
+; CHECK-NEXT: bfi x10, x12, #16, #1
+; CHECK-NEXT: orr x0, x10, x9
+; CHECK-NEXT: ldr x9, [x11, #16]
+; CHECK-NEXT: cbnz x11, .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/Transforms/TypePromotion/AArch64/convert-utf.ll b/llvm/test/Transforms/TypePromotion/AArch64/convert-utf.ll
index 7965c8a5ff78..b77ffc041c92 100644
--- a/llvm/test/Transforms/TypePromotion/AArch64/convert-utf.ll
+++ b/llvm/test/Transforms/TypePromotion/AArch64/convert-utf.ll
@@ -20,8 +20,7 @@ define dso_local noundef i32 @_ZN4llvm18ConvertUTF8toUTF16EPPKhS1_PPtS3_NS_15Con
; CHECK-NEXT: [[SOURCE_0184:%.*]] = phi i8* [ [[SOURCE_6:%.*]], [[CLEANUP]] ], [ [[I]], [[ENTRY]] ]
; CHECK-NEXT: [[I2:%.*]] = load i8, i8* [[SOURCE_0184]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[I2]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i8], [256 x i8]* @_ZN4llvmL20trailingBytesForUTF8E, i64 0, i64 [[IDXPROM]]
; CHECK-NEXT: [[I3:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[I3]] to i64
@@ -54,9 +53,9 @@ define dso_local noundef i32 @_ZN4llvm18ConvertUTF8toUTF16EPPKhS1_PPtS3_NS_15Con
; CHECK-NEXT: [[SRCPTR_1_I:%.*]] = phi i8* [ [[ADD_PTR_I]], [[IF_END]] ], [ [[INCDEC_PTR4_I]], [[SW_BB3_I]] ]
; CHECK-NEXT: [[INCDEC_PTR13_I:%.*]] = getelementptr inbounds i8, i8* [[SRCPTR_1_I]], i64 -1
; CHECK-NEXT: [[I10:%.*]] = load i8, i8* [[INCDEC_PTR13_I]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[I10]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; CHECK-NEXT: [[I11:%.*]] = icmp sgt i8 [[TMP3]], -65
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[I10]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; CHECK-NEXT: [[I11:%.*]] = icmp sgt i8 [[TMP2]], -65
; CHECK-NEXT: br i1 [[I11]], label [[WHILE_END]], label [[IF_END20_I:%.*]]
; CHECK: if.end20.i:
; CHECK-NEXT: switch i32 [[TMP0]], label [[SW_BB47_I]] [
@@ -66,20 +65,20 @@ define dso_local noundef i32 @_ZN4llvm18ConvertUTF8toUTF16EPPKhS1_PPtS3_NS_15Con
; CHECK-NEXT: i32 244, label [[SW_BB37_I:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb22.i:
-; CHECK-NEXT: [[CMP24_I:%.*]] = icmp ult i32 [[TMP2]], 160
+; CHECK-NEXT: [[CMP24_I:%.*]] = icmp ult i32 [[TMP1]], 160
; CHECK-NEXT: br i1 [[CMP24_I]], label [[WHILE_END]], label [[IF_END5:%.*]]
; CHECK: sw.bb27.i:
-; CHECK-NEXT: [[CMP29_I:%.*]] = icmp ugt i32 [[TMP2]], 159
+; CHECK-NEXT: [[CMP29_I:%.*]] = icmp ugt i32 [[TMP1]], 159
; CHECK-NEXT: br i1 [[CMP29_I]], label [[WHILE_END]], label [[IF_END5]]
; CHECK: sw.bb32.i:
-; CHECK-NEXT: [[CMP34_I:%.*]] = icmp ult i32 [[TMP2]], 144
+; CHECK-NEXT: [[CMP34_I:%.*]] = icmp ult i32 [[TMP1]], 144
; CHECK-NEXT: br i1 [[CMP34_I]], label [[WHILE_END]], label [[IF_END5]]
; CHECK: sw.bb37.i:
-; CHECK-NEXT: [[CMP39_I:%.*]] = icmp ugt i32 [[TMP2]], 143
+; CHECK-NEXT: [[CMP39_I:%.*]] = icmp ugt i32 [[TMP1]], 143
; CHECK-NEXT: br i1 [[CMP39_I]], label [[WHILE_END]], label [[IF_END5]]
; CHECK: sw.bb47.i:
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT: [[I12:%.*]] = icmp slt i8 [[TMP4]], -62
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT: [[I12:%.*]] = icmp slt i8 [[TMP3]], -62
; CHECK-NEXT: [[CMP56_I:%.*]] = icmp ugt i32 [[TMP0]], 244
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[I12]], [[CMP56_I]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[WHILE_END]], label [[IF_END5]]
@@ -95,30 +94,30 @@ define dso_local noundef i32 @_ZN4llvm18ConvertUTF8toUTF16EPPKhS1_PPtS3_NS_15Con
; CHECK-NEXT: [[CONV16:%.*]] = zext i8 [[I2]] to i32
; CHECK-NEXT: [[SHL18:%.*]] = shl nuw nsw i32 [[CONV16]], 6
; CHECK-NEXT: [[DOTPRE232:%.*]] = load i8, i8* [[INCDEC_PTR15]], align 1
-; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[DOTPRE232]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[DOTPRE232]] to i32
; CHECK-NEXT: br label [[SW_BB19]]
; CHECK: sw.bb19:
-; CHECK-NEXT: [[I13:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP5]], [[SW_BB14]] ]
+; CHECK-NEXT: [[I13:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP4]], [[SW_BB14]] ]
; CHECK-NEXT: [[SOURCE_3:%.*]] = phi i8* [ [[SOURCE_0184]], [[IF_END5]] ], [ [[INCDEC_PTR15]], [[SW_BB14]] ]
; CHECK-NEXT: [[CH_2:%.*]] = phi i32 [ 0, [[IF_END5]] ], [ [[SHL18]], [[SW_BB14]] ]
; CHECK-NEXT: [[INCDEC_PTR20:%.*]] = getelementptr inbounds i8, i8* [[SOURCE_3]], i64 1
; CHECK-NEXT: [[ADD22:%.*]] = add nuw nsw i32 [[CH_2]], [[I13]]
; CHECK-NEXT: [[SHL23:%.*]] = shl nsw i32 [[ADD22]], 6
; CHECK-NEXT: [[DOTPRE233:%.*]] = load i8, i8* [[INCDEC_PTR20]], align 1
-; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[DOTPRE233]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[DOTPRE233]] to i32
; CHECK-NEXT: br label [[SW_BB24]]
; CHECK: sw.bb24:
-; CHECK-NEXT: [[I14:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP6]], [[SW_BB19]] ]
+; CHECK-NEXT: [[I14:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP5]], [[SW_BB19]] ]
; CHECK-NEXT: [[SOURCE_4:%.*]] = phi i8* [ [[SOURCE_0184]], [[IF_END5]] ], [ [[INCDEC_PTR20]], [[SW_BB19]] ]
; CHECK-NEXT: [[CH_3:%.*]] = phi i32 [ 0, [[IF_END5]] ], [ [[SHL23]], [[SW_BB19]] ]
; CHECK-NEXT: [[INCDEC_PTR25:%.*]] = getelementptr inbounds i8, i8* [[SOURCE_4]], i64 1
; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[CH_3]], [[I14]]
; CHECK-NEXT: [[SHL28:%.*]] = shl i32 [[ADD27]], 6
; CHECK-NEXT: [[DOTPRE234:%.*]] = load i8, i8* [[INCDEC_PTR25]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[DOTPRE234]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[DOTPRE234]] to i32
; CHECK-NEXT: br label [[SW_BB29]]
; CHECK: sw.bb29:
-; CHECK-NEXT: [[I15:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP7]], [[SW_BB24]] ]
+; CHECK-NEXT: [[I15:%.*]] = phi i32 [ [[TMP0]], [[IF_END5]] ], [ [[TMP6]], [[SW_BB24]] ]
; CHECK-NEXT: [[SOURCE_5:%.*]] = phi i8* [ [[SOURCE_0184]], [[IF_END5]] ], [ [[INCDEC_PTR25]], [[SW_BB24]] ]
; CHECK-NEXT: [[CH_4:%.*]] = phi i32 [ 0, [[IF_END5]] ], [ [[SHL28]], [[SW_BB24]] ]
; CHECK-NEXT: [[INCDEC_PTR30:%.*]] = getelementptr inbounds i8, i8* [[SOURCE_5]], i64 1
diff --git a/llvm/test/Transforms/TypePromotion/AArch64/phi-zext-gep.ll b/llvm/test/Transforms/TypePromotion/AArch64/phi-zext-gep.ll
new file mode 100644
index 000000000000..9c2da5a49a1b
--- /dev/null
+++ b/llvm/test/Transforms/TypePromotion/AArch64/phi-zext-gep.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=aarch64 -type-promotion -verify -S %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define dso_local i32 @avoid_trunc_gep(i8* nocapture readonly %ip) {
+; CHECK-LABEL: @avoid_trunc_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TAG_0_IN8:%.*]] = load i8, i8* [[IP:%.*]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[TAG_0_IN8]] to i32
+; CHECK-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP0]], 100
+; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[TAG_0_IN10:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[TAG_0:%.*]] = zext i32 [[TAG_0_IN10]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[IP]], i64 [[TAG_0]]
+; CHECK-NEXT: [[TAG_0_IN:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1]] = zext i8 [[TAG_0_IN]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 100
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: [[TAG_0_IN_LCSSA:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TAG_0_IN_LCSSA]] to i8
+; CHECK-NEXT: ret i32 [[TAG_0_IN_LCSSA]]
+;
+entry:
+ %tag.0.in8 = load i8, i8* %ip, align 1
+ %cmp9 = icmp ult i8 %tag.0.in8, 100
+ br i1 %cmp9, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %tag.0.in10 = phi i8 [ %tag.0.in, %for.body ], [ %tag.0.in8, %for.body.preheader ]
+ %tag.0 = zext i8 %tag.0.in10 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %ip, i64 %tag.0
+ %tag.0.in = load i8, i8* %arrayidx, align 1
+ %cmp = icmp ult i8 %tag.0.in, 100
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %tag.0.in.lcssa = phi i8 [ %tag.0.in8, %entry ], [ %tag.0.in, %for.end.loopexit ]
+ %conv3 = zext i8 %tag.0.in.lcssa to i32
+ ret i32 %conv3
+}
diff --git a/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll b/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll
index 1b5d2881477d..0a846ba115ec 100644
--- a/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll
+++ b/llvm/test/Transforms/TypePromotion/AArch64/trunc-zext-chain.ll
@@ -35,8 +35,7 @@ define i64 @zext_trunc_i8_i16_i32_i64() {
; CHECK-NEXT: [[VAR24:%.*]] = and i32 [[TMP0]], 255
; CHECK-NEXT: [[VAR25:%.*]] = or i32 [[VAR23]], [[VAR24]]
; CHECK-NEXT: [[VAR26:%.*]] = select i1 [[VAR18]], i64 0, i64 65536
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[VAR25]] to i16
-; CHECK-NEXT: [[VAR27:%.*]] = zext i16 [[TMP2]] to i64
+; CHECK-NEXT: [[VAR27:%.*]] = zext i32 [[VAR25]] to i64
; CHECK-NEXT: [[VAR28:%.*]] = and i64 [[VAR4]], -4294967296
; CHECK-NEXT: [[VAR29:%.*]] = or i64 [[VAR26]], [[VAR28]]
; CHECK-NEXT: [[VAR30]] = or i64 [[VAR29]], [[VAR27]]
@@ -120,8 +119,7 @@ define i64 @with_undef() {
; CHECK-NEXT: [[VAR24:%.*]] = and i32 [[TMP0]], 255
; CHECK-NEXT: [[VAR25:%.*]] = or i32 [[VAR23]], [[VAR24]]
; CHECK-NEXT: [[VAR26:%.*]] = select i1 [[VAR18]], i64 0, i64 65536
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[VAR25]] to i16
-; CHECK-NEXT: [[VAR27:%.*]] = zext i16 [[TMP2]] to i64
+; CHECK-NEXT: [[VAR27:%.*]] = zext i32 [[VAR25]] to i64
; CHECK-NEXT: [[VAR28:%.*]] = and i64 [[VAR4]], -4294967296
; CHECK-NEXT: [[VAR29:%.*]] = or i64 [[VAR26]], [[VAR28]]
; CHECK-NEXT: [[VAR30]] = or i64 [[VAR29]], [[VAR27]]
More information about the llvm-commits
mailing list