[llvm] c2ab65d - [IndVars] Eliminate redundant type cast with different sizes

Tue Aug 9 09:00:13 PDT 2022

Author: zhongyunde
Date: 2022-08-09T23:59:42+08:00
New Revision: c2ab65ddaf4b2ab936ede3cc673de7f8f0440a48

URL: https://github.com/llvm/llvm-project/commit/c2ab65ddaf4b2ab936ede3cc673de7f8f0440a48
DIFF: https://github.com/llvm/llvm-project/commit/c2ab65ddaf4b2ab936ede3cc673de7f8f0440a48.diff

LOG: [IndVars] Eliminate redundant type cast with different sizes

Deal with different sizes between the itofp and fptoi with
trunc or sext/zext, depend on D129756.
Fixes https://github.com/llvm/llvm-project/issues/55505.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D129958

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
    llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
    llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 0ab79a32f5267..0a856eec3e12a 100644

--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -683,7 +683,7 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
       UseInst->getOpcode() != CastInst::UIToFP)
     return false;
 
-  Value *IVOperand = UseInst->getOperand(0);
+  Instruction *IVOperand = cast<Instruction>(UseInst->getOperand(0));
   // Get the symbolic expression for this instruction.
   const SCEV *IV = SE->getSCEV(IVOperand);
   unsigned MaskBits;
@@ -696,17 +696,35 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
     for (User *U : UseInst->users()) {
       // Match for fptosi/fptoui of sitofp and with same type.
       auto *CI = dyn_cast<CastInst>(U);
-      if (!CI || IVOperand->getType() != CI->getType())
+      if (!CI)
         continue;
 
       CastInst::CastOps Opcode = CI->getOpcode();
       if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI)
         continue;
 
-      CI->replaceAllUsesWith(IVOperand);
+      Value *Conv = nullptr;
+      if (IVOperand->getType() != CI->getType()) {
+        IRBuilder<> Builder(CI);
+        StringRef Name = IVOperand->getName();
+        // To match InstCombine logic, we only need sext if both fptosi and
+        // sitofp are used. If one of them is unsigned, then we can use zext.
+        if (SE->getTypeSizeInBits(IVOperand->getType()) >
+            SE->getTypeSizeInBits(CI->getType())) {
+          Conv = Builder.CreateTrunc(IVOperand, CI->getType(), Name + ".trunc");
+        } else if (Opcode == CastInst::FPToUI ||
+                   UseInst->getOpcode() == CastInst::UIToFP) {
+          Conv = Builder.CreateZExt(IVOperand, CI->getType(), Name + ".zext");
+        } else {
+          Conv = Builder.CreateSExt(IVOperand, CI->getType(), Name + ".sext");
+        }
+      } else
+        Conv = IVOperand;
+
+      CI->replaceAllUsesWith(Conv);
       DeadInsts.push_back(CI);
       LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
-                        << " with: " << *IVOperand << '\n');
+                        << " with: " << *Conv << '\n');
 
       ++NumFoldedUser;
       Changed = true;

diff  --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
index 52b726db9f938..008dc91bf460d 100644
--- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
+++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -380,14 +380,14 @@ define void @pr55505_remove_redundant_fptosi_for_float_iv(i32 %index, ptr %dst)
 ; CHECK-NEXT:    [[INDVAR_CONV:%.*]] = sitofp i32 [[FLOAT_IV_INT]] to float
 ; CHECK-NEXT:    call void @use.float(float [[INDVAR_CONV]])
 ; CHECK-NEXT:    call void @use.i32(i32 [[FLOAT_IV_INT]])
-; CHECK-NEXT:    [[CONV_I16:%.*]] = fptosi float [[INDVAR_CONV]] to i16
-; CHECK-NEXT:    [[CONV_I64:%.*]] = fptosi float [[INDVAR_CONV]] to i64
-; CHECK-NEXT:    call void @use.i16(i16 [[CONV_I16]])
-; CHECK-NEXT:    call void @use.i64(i64 [[CONV_I64]])
-; CHECK-NEXT:    [[UCONV_I16:%.*]] = fptoui float [[INDVAR_CONV]] to i16
-; CHECK-NEXT:    [[UCONV_I64:%.*]] = fptoui float [[INDVAR_CONV]] to i64
-; CHECK-NEXT:    call void @use.i16(i16 [[UCONV_I16]])
-; CHECK-NEXT:    call void @use.i64(i64 [[UCONV_I64]])
+; CHECK-NEXT:    [[FLOAT_IV_INT_TRUNC:%.*]] = trunc i32 [[FLOAT_IV_INT]] to i16
+; CHECK-NEXT:    [[FLOAT_IV_INT_SEXT:%.*]] = sext i32 [[FLOAT_IV_INT]] to i64
+; CHECK-NEXT:    call void @use.i16(i16 [[FLOAT_IV_INT_TRUNC]])
+; CHECK-NEXT:    call void @use.i64(i64 [[FLOAT_IV_INT_SEXT]])
+; CHECK-NEXT:    [[FLOAT_IV_INT_TRUNC2:%.*]] = trunc i32 [[FLOAT_IV_INT]] to i16
+; CHECK-NEXT:    [[FLOAT_IV_INT_ZEXT:%.*]] = zext i32 [[FLOAT_IV_INT]] to i64
+; CHECK-NEXT:    call void @use.i16(i16 [[FLOAT_IV_INT_TRUNC2]])
+; CHECK-NEXT:    call void @use.i64(i64 [[FLOAT_IV_INT_ZEXT]])
 ; CHECK-NEXT:    [[FLOAT_IV_NEXT_INT]] = add nsw i32 [[FLOAT_IV_INT]], -1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[FLOAT_IV_NEXT_INT]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]

diff  --git a/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll
index 9795b48b881ac..8b4d2cfe5fe20 100644
--- a/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll
+++ b/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll
@@ -72,22 +72,19 @@ cleanup:                                          ; preds = %for.body
   ret void
 }
 
-; Negative test: Type mismatch between the integer IV and the fptosi result
-define void @sitofp_fptosi_range_mismatch_type() {
+; Type mismatch between the integer IV and the fptosi result
+define void @sitofp_fptosi_range_trunc() {
 ;
-; CHECK-LABEL: @sitofp_fptosi_range_mismatch_type(
+; CHECK-LABEL: @sitofp_fptosi_range_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[IV_INT:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to float
-; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[INDVAR_CONV]] to i16
-; CHECK-NEXT:    [[IDXPROM32:%.*]] = sext i16 [[CONV]] to i32
-; CHECK-NEXT:    [[IDXPROM64:%.*]] = sext i16 [[CONV]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IDXPROM64]]
-; CHECK-NEXT:    store i32 [[IDXPROM32]], i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[DEC_INT]] = add nsw i32 [[IV_INT]], -1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i64 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV_INT_TRUNC:%.*]] = trunc i64 [[IV_INT]] to i32
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IV_INT]]
+; CHECK-NEXT:    store i32 [[IV_INT_TRUNC]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[DEC_INT]] = add nsw i64 [[IV_INT]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[DEC_INT]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
 ; CHECK:       cleanup:
 ; CHECK-NEXT:    ret void
@@ -96,21 +93,164 @@ entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
-  %iv.int = phi i32 [ 100, %entry ], [ %dec.int, %for.body ]
-  %indvar.conv = sitofp i32 %iv.int to float
-  %conv = fptosi float %indvar.conv to i16
-  %idxprom32 = sext i16 %conv to i32
-  %idxprom64 = sext i16 %conv to i64
+  %iv.int = phi i64 [ 100, %entry ], [ %dec.int, %for.body ]
+  %indvar.conv = sitofp i64 %iv.int to float
+  %idxprom32 = fptosi float %indvar.conv to i32
+  %idxprom64 = fptosi float %indvar.conv to i64
   %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom64
   store i32 %idxprom32, i32* %arrayidx, align 4
-  %dec.int = add nsw i32 %iv.int, -1
-  %cmp = icmp ugt i32 %dec.int, 0
+  %dec.int = add nsw i64 %iv.int, -1
+  %cmp = icmp ugt i64 %dec.int, 0
   br i1 %cmp, label %for.body, label %cleanup
 
 cleanup:                                          ; preds = %for.body
   ret void
 }
 
+define void @sitofp_fptosi_range_sext() {
+;
+; CHECK-LABEL: @sitofp_fptosi_range_sext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i16 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV_INT_SEXT1:%.*]] = sext i16 [[IV_INT]] to i32
+; CHECK-NEXT:    [[IV_INT_SEXT:%.*]] = sext i16 [[IV_INT]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IV_INT_SEXT]]
+; CHECK-NEXT:    store i32 [[IV_INT_SEXT1]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[DEC_INT]] = add nsw i16 [[IV_INT]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[DEC_INT]], -3
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.int = phi i16 [ 100, %entry ], [ %dec.int, %for.body ]
+  %indvar.conv = sitofp i16 %iv.int to float
+  %idxprom32 = fptosi float %indvar.conv to i32
+  %idxprom64 = fptosi float %indvar.conv to i64
+  %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom64
+  store i32 %idxprom32, i32* %arrayidx, align 4
+  %dec.int = add nsw i16 %iv.int, -1
+  %cmp = icmp sgt i16 %dec.int, -3
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.body
+  ret void
+}
+
+; If one of them is unsigned, then we can use zext.
+define void @sitofp_fptoui_range_zext() {
+;
+; CHECK-LABEL: @sitofp_fptoui_range_zext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i16 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV_INT_ZEXT1:%.*]] = zext i16 [[IV_INT]] to i32
+; CHECK-NEXT:    [[IV_INT_ZEXT:%.*]] = zext i16 [[IV_INT]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IV_INT_ZEXT]]
+; CHECK-NEXT:    store i32 [[IV_INT_ZEXT1]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[DEC_INT]] = add nsw i16 [[IV_INT]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[DEC_INT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.int = phi i16 [ 100, %entry ], [ %dec.int, %for.body ]
+  %indvar.conv = sitofp i16 %iv.int to float
+  %idxprom32 = fptoui float %indvar.conv to i32
+  %idxprom64 = fptoui float %indvar.conv to i64
+  %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom64
+  store i32 %idxprom32, i32* %arrayidx, align 4
+  %dec.int = add nsw i16 %iv.int, -1
+  %cmp = icmp ugt i16 %dec.int, 0
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.body
+  ret void
+}
+
+; Take care of the insertion point.
+define void @sitofp_fptoui_range_zext_postinc() {
+;
+; CHECK-LABEL: @sitofp_fptoui_range_zext_postinc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i16 [ 100, [[ENTRY:%.*]] ], [ [[INC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INC_INT]] = add nuw nsw i16 [[IV_INT]], 2
+; CHECK-NEXT:    [[INC_INT_ZEXT1:%.*]] = zext i16 [[INC_INT]] to i32
+; CHECK-NEXT:    [[INC_INT_ZEXT:%.*]] = zext i16 [[INC_INT]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[INC_INT_ZEXT]]
+; CHECK-NEXT:    store i32 [[INC_INT_ZEXT1]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[INC_INT]], 200
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.int = phi i16 [ 100, %entry ], [ %inc.int, %for.body ]
+  %inc.int = add nsw i16 %iv.int, 2
+  %indvar.conv = sitofp i16 %inc.int to float     ; The 'postinc IV' %inc.int passes to sitofp
+  %idxprom32 = fptoui float %indvar.conv to i32
+  %idxprom64 = fptoui float %indvar.conv to i64
+  %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom64
+  store i32 %idxprom32, i32* %arrayidx, align 4
+  %cmp = icmp ult i16 %inc.int, 200
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.body
+  ret void
+}
+
+; If one of them is unsigned, then we can use zext.
+define void @uitofp_fptosi_range_zext() {
+;
+; CHECK-LABEL: @uitofp_fptosi_range_zext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i16 [ 100, [[ENTRY:%.*]] ], [ [[INC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV_INT_ZEXT1:%.*]] = zext i16 [[IV_INT]] to i32
+; CHECK-NEXT:    [[IV_INT_ZEXT:%.*]] = zext i16 [[IV_INT]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IV_INT_ZEXT]]
+; CHECK-NEXT:    store i32 [[IV_INT_ZEXT1]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC_INT]] = add nuw nsw i16 [[IV_INT]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[INC_INT]], 200
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.int = phi i16 [ 100, %entry ], [ %inc.int, %for.body ]
+  %indvar.conv = uitofp i16 %iv.int to float
+  %idxprom32 = fptosi float %indvar.conv to i32
+  %idxprom64 = fptosi float %indvar.conv to i64
+  %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom64
+  store i32 %idxprom32, i32* %arrayidx, align 4
+  %inc.int = add nsw i16 %iv.int, 2
+  %cmp = icmp ult i16 %inc.int, 200
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.body
+  ret void
+}
+
+
 define void @sitofp_fptoui_range() {
 ; CHECK-LABEL: @sitofp_fptoui_range(
 ; CHECK-NEXT:  entry: