[llvm] f7c0e2b - [ARM] Prevent constants from iCmp instruction from being hoisted if part of a min(max()) pattern

Meera Nakrani via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 2 02:29:09 PDT 2020


Author: Meera Nakrani
Date: 2020-10-02T09:28:35Z
New Revision: f7c0e2b8f26fc6573f663f482aa64443ab6a6e71

URL: https://github.com/llvm/llvm-project/commit/f7c0e2b8f26fc6573f663f482aa64443ab6a6e71
DIFF: https://github.com/llvm/llvm-project/commit/f7c0e2b8f26fc6573f663f482aa64443ab6a6e71.diff

LOG: [ARM] Prevent constants from iCmp instruction from being hoisted if part of a min(max()) pattern

Marks constants of an ICmp instruction as free if it's only user is a select
instruction that is part of a min(max()) pattern. Ensures that in loops, in
particular when loop unrolling is turned on, SSAT will still be correctly generated.

Differential Revision: https://reviews.llvm.org/D88662

Added: 
    llvm/test/CodeGen/ARM/ssat-unroll-loops.ll

Modified: 
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 892008dce87b..3eb3bfb52d23 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -362,8 +362,12 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
   // Ensures negative constant of min(max()) or max(min()) patterns that
   // match to SSAT instructions don't get hoisted
   if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
-      Ty->getIntegerBitWidth() <= 32 && isSSATMinMaxPattern(Inst, Imm))
-    return 0;
+      Ty->getIntegerBitWidth() <= 32) {
+    if (isSSATMinMaxPattern(Inst, Imm) ||
+        (isa<ICmpInst>(Inst) && Inst->hasOneUse() &&
+         isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm)))
+      return 0;
+  }
 
   return getIntImmCost(Imm, Ty, CostKind);
 }

diff  --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
new file mode 100644
index 000000000000..f1b4ab2d937d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK
+
+; Checks SSAT is still generated when loop unrolling is on
+
+define void @ssat_unroll(i16* %pSrcA, i16* %pSrcB, i16* %pDst, i32 %blockSize) {
+; CHECK-LABEL: ssat_unroll:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    beq .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
+; CHECK-NEXT:    sub r12, r3, #1
+; CHECK-NEXT:    tst r3, #1
+; CHECK-NEXT:    beq .LBB0_3
+; CHECK-NEXT:  @ %bb.2: @ %while.body.prol.preheader
+; CHECK-NEXT:    ldrsh lr, [r0], #2
+; CHECK-NEXT:    ldrsh r3, [r1], #2
+; CHECK-NEXT:    smulbb r3, r3, lr
+; CHECK-NEXT:    ssat r3, #16, r3, asr #14
+; CHECK-NEXT:    strh r3, [r2], #2
+; CHECK-NEXT:    mov r3, r12
+; CHECK-NEXT:  .LBB0_3: @ %while.body.prol.loopexit
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    popeq {r11, pc}
+; CHECK-NEXT:  .LBB0_4: @ %while.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldrsh r12, [r0]
+; CHECK-NEXT:    subs r3, r3, #2
+; CHECK-NEXT:    ldrsh lr, [r1]
+; CHECK-NEXT:    smulbb r12, lr, r12
+; CHECK-NEXT:    ssat r12, #16, r12, asr #14
+; CHECK-NEXT:    strh r12, [r2]
+; CHECK-NEXT:    ldrsh r12, [r0, #2]
+; CHECK-NEXT:    add r0, r0, #4
+; CHECK-NEXT:    ldrsh lr, [r1, #2]
+; CHECK-NEXT:    add r1, r1, #4
+; CHECK-NEXT:    smulbb r12, lr, r12
+; CHECK-NEXT:    ssat r12, #16, r12, asr #14
+; CHECK-NEXT:    strh r12, [r2, #2]
+; CHECK-NEXT:    add r2, r2, #4
+; CHECK-NEXT:    bne .LBB0_4
+; CHECK-NEXT:  .LBB0_5: @ %while.end
+; CHECK-NEXT:    pop {r11, pc}
+entry:
+  %cmp.not7 = icmp eq i32 %blockSize, 0
+  br i1 %cmp.not7, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  %0 = add i32 %blockSize, -1
+  %xtraiter = and i32 %blockSize, 1
+  %lcmp.mod.not = icmp eq i32 %xtraiter, 0
+  br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader
+
+while.body.prol.preheader:                        ; preds = %while.body.preheader
+  %incdec.ptr.prol = getelementptr inbounds i16, i16* %pSrcA, i32 1
+  %1 = load i16, i16* %pSrcA
+  %conv.prol = sext i16 %1 to i32
+  %incdec.ptr1.prol = getelementptr inbounds i16, i16* %pSrcB, i32 1
+  %2 = load i16, i16* %pSrcB
+  %conv2.prol = sext i16 %2 to i32
+  %mul.prol = mul nsw i32 %conv2.prol, %conv.prol
+  %shr.prol = ashr i32 %mul.prol, 14
+  %3 = icmp sgt i32 %shr.prol, -32768
+  %4 = select i1 %3, i32 %shr.prol, i32 -32768
+  %5 = icmp slt i32 %4, 32767
+  %spec.select.i.prol = select i1 %5, i32 %4, i32 32767
+  %conv3.prol = trunc i32 %spec.select.i.prol to i16
+  %incdec.ptr4.prol = getelementptr inbounds i16, i16* %pDst, i32 1
+  store i16 %conv3.prol, i16* %pDst
+  br label %while.body.prol.loopexit
+
+while.body.prol.loopexit:                         ; preds = %while.body.prol.preheader, %while.body.preheader
+  %blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ]
+  %pSrcA.addr.010.unr = phi i16* [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ]
+  %pDst.addr.09.unr = phi i16* [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ]
+  %pSrcB.addr.08.unr = phi i16* [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ]
+  %6 = icmp eq i32 %0, 0
+  br i1 %6, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.body.prol.loopexit, %while.body
+  %blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ]
+  %pSrcA.addr.010 = phi i16* [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ]
+  %pDst.addr.09 = phi i16* [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ]
+  %pSrcB.addr.08 = phi i16* [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.010, i32 1
+  %7 = load i16, i16* %pSrcA.addr.010
+  %conv = sext i16 %7 to i32
+  %incdec.ptr1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i32 1
+  %8 = load i16, i16* %pSrcB.addr.08
+  %conv2 = sext i16 %8 to i32
+  %mul = mul nsw i32 %conv2, %conv
+  %shr = ashr i32 %mul, 14
+  %9 = icmp sgt i32 %shr, -32768
+  %10 = select i1 %9, i32 %shr, i32 -32768
+  %11 = icmp slt i32 %10, 32767
+  %spec.select.i = select i1 %11, i32 %10, i32 32767
+  %conv3 = trunc i32 %spec.select.i to i16
+  %incdec.ptr4 = getelementptr inbounds i16, i16* %pDst.addr.09, i32 1
+  store i16 %conv3, i16* %pDst.addr.09
+  %incdec.ptr.1 = getelementptr inbounds i16, i16* %pSrcA.addr.010, i32 2
+  %12 = load i16, i16* %incdec.ptr
+  %conv.1 = sext i16 %12 to i32
+  %incdec.ptr1.1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i32 2
+  %13 = load i16, i16* %incdec.ptr1
+  %conv2.1 = sext i16 %13 to i32
+  %mul.1 = mul nsw i32 %conv2.1, %conv.1
+  %shr.1 = ashr i32 %mul.1, 14
+  %14 = icmp sgt i32 %shr.1, -32768
+  %15 = select i1 %14, i32 %shr.1, i32 -32768
+  %16 = icmp slt i32 %15, 32767
+  %spec.select.i.1 = select i1 %16, i32 %15, i32 32767
+  %conv3.1 = trunc i32 %spec.select.i.1 to i16
+  %incdec.ptr4.1 = getelementptr inbounds i16, i16* %pDst.addr.09, i32 2
+  store i16 %conv3.1, i16* %incdec.ptr4
+  %dec.1 = add i32 %blkCnt.011, -2
+  %cmp.not.1 = icmp eq i32 %dec.1, 0
+  br i1 %cmp.not.1, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %while.body.prol.loopexit, %entry
+  ret void
+}


        


More information about the llvm-commits mailing list