[llvm] [LV] fix logical error in trunc cost (PR #91136)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue May 14 05:02:22 PDT 2024


================
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize,dce,simplifycfg -mtriple=s390x -mcpu=z14 -S %s | FileCheck %s --check-prefix=NOVEC
+; RUN: opt -passes=loop-vectorize,dce,simplifycfg -force-vector-width=2 -mtriple=s390x -mcpu=z14 -S %s | FileCheck %s
+
+define void @test(ptr %p) {
+; NOVEC-LABEL: define void @test(
+; NOVEC-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; NOVEC-NEXT:  entry:
+; NOVEC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 0, i64 0
+; NOVEC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; NOVEC-NEXT:    [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NOVEC-NEXT:    [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+; NOVEC-NEXT:    [[TMP1:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
+; NOVEC-NEXT:    [[TMP2:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
+; NOVEC-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
+; NOVEC-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], true
+; NOVEC-NEXT:    [[TMP5:%.*]] = xor i1 [[TMP3]], true
+; NOVEC-NEXT:    [[TMP6:%.*]] = xor i1 [[TMP4]], true
+; NOVEC-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; NOVEC-NEXT:    [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
+; NOVEC-NEXT:    [[TMP9:%.*]] = xor i1 [[TMP7]], true
+; NOVEC-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP8]], true
+; NOVEC-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP9]], true
+; NOVEC-NEXT:    [[TMP12:%.*]] = xor i1 [[TMP10]], true
+; NOVEC-NEXT:    [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
+; NOVEC-NEXT:    [[TMP14:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
+; NOVEC-NEXT:    [[TMP15:%.*]] = xor i1 [[TMP13]], true
+; NOVEC-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP14]], true
+; NOVEC-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP15]], true
+; NOVEC-NEXT:    [[TMP18:%.*]] = xor i1 [[TMP16]], true
+; NOVEC-NEXT:    [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
+; NOVEC-NEXT:    [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
+; NOVEC-NEXT:    [[TMP21:%.*]] = xor i1 [[TMP19]], true
+; NOVEC-NEXT:    [[TMP22:%.*]] = xor i1 [[TMP20]], true
+; NOVEC-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP21]], true
+; NOVEC-NEXT:    [[TMP24:%.*]] = xor i1 [[TMP22]], true
+; NOVEC-NEXT:    [[TMP25:%.*]] = or i1 [[TMP23]], [[TMP24]]
+; NOVEC-NEXT:    [[TMP26:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
+; NOVEC-NEXT:    [[TMP27:%.*]] = xor i1 [[TMP25]], true
+; NOVEC-NEXT:    [[TMP28:%.*]] = xor i1 [[TMP26]], true
+; NOVEC-NEXT:    [[TMP29:%.*]] = xor i1 [[TMP27]], true
+; NOVEC-NEXT:    [[TMP30:%.*]] = xor i1 [[TMP28]], true
+; NOVEC-NEXT:    [[TMP31:%.*]] = or i1 [[TMP29]], [[TMP30]]
+; NOVEC-NEXT:    [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
+; NOVEC-NEXT:    [[TMP33:%.*]] = xor i1 [[TMP31]], true
+; NOVEC-NEXT:    [[TMP34:%.*]] = xor i1 [[TMP32]], true
+; NOVEC-NEXT:    [[TMP35:%.*]] = xor i1 [[TMP33]], true
+; NOVEC-NEXT:    [[TMP36:%.*]] = xor i1 [[TMP34]], true
+; NOVEC-NEXT:    [[TMP37:%.*]] = or i1 [[TMP35]], [[TMP36]]
+; NOVEC-NEXT:    [[TMP38:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
+; NOVEC-NEXT:    [[TMP39:%.*]] = xor i1 [[TMP37]], true
+; NOVEC-NEXT:    [[TMP40:%.*]] = xor i1 [[TMP38]], true
+; NOVEC-NEXT:    [[TMP41:%.*]] = xor i1 [[TMP39]], true
+; NOVEC-NEXT:    [[TMP42:%.*]] = xor i1 [[TMP40]], true
+; NOVEC-NEXT:    [[TMP43:%.*]] = or i1 [[TMP41]], [[TMP42]]
+; NOVEC-NEXT:    [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
+; NOVEC-NEXT:    [[TMP45:%.*]] = xor i1 [[TMP43]], true
+; NOVEC-NEXT:    [[TMP46:%.*]] = xor i1 [[TMP44]], true
+; NOVEC-NEXT:    [[TMP47:%.*]] = xor i1 [[TMP45]], true
+; NOVEC-NEXT:    [[TMP48:%.*]] = xor i1 [[TMP46]], true
+; NOVEC-NEXT:    [[TMP49:%.*]] = or i1 [[TMP47]], [[TMP48]]
+; NOVEC-NEXT:    [[TMP50:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
+; NOVEC-NEXT:    [[TMP51:%.*]] = xor i1 [[TMP49]], true
+; NOVEC-NEXT:    [[TMP52:%.*]] = xor i1 [[TMP50]], true
+; NOVEC-NEXT:    [[TMP53:%.*]] = xor i1 [[TMP51]], true
+; NOVEC-NEXT:    [[TMP54:%.*]] = xor i1 [[TMP52]], true
+; NOVEC-NEXT:    [[TMP55:%.*]] = or i1 [[TMP53]], [[TMP54]]
+; NOVEC-NEXT:    [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
+; NOVEC-NEXT:    [[TMP57:%.*]] = xor i1 [[TMP55]], true
+; NOVEC-NEXT:    [[TMP58:%.*]] = xor i1 [[TMP56]], true
+; NOVEC-NEXT:    [[TMP59:%.*]] = xor i1 [[TMP57]], true
+; NOVEC-NEXT:    [[TMP60:%.*]] = xor i1 [[TMP58]], true
+; NOVEC-NEXT:    [[TMP61:%.*]] = or i1 [[TMP59]], [[TMP60]]
+; NOVEC-NEXT:    [[TMP62:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
+; NOVEC-NEXT:    [[TMP63:%.*]] = xor i1 [[TMP61]], true
+; NOVEC-NEXT:    [[TMP64:%.*]] = xor i1 [[TMP62]], true
+; NOVEC-NEXT:    [[TMP65:%.*]] = xor i1 [[TMP63]], true
+; NOVEC-NEXT:    [[TMP66:%.*]] = xor i1 [[TMP64]], true
+; NOVEC-NEXT:    [[TMP67:%.*]] = or i1 [[TMP65]], [[TMP66]]
+; NOVEC-NEXT:    [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
+; NOVEC-NEXT:    [[TMP69:%.*]] = xor i1 [[TMP67]], true
+; NOVEC-NEXT:    [[TMP70:%.*]] = xor i1 [[TMP68]], true
+; NOVEC-NEXT:    [[TMP71:%.*]] = xor i1 [[TMP69]], true
+; NOVEC-NEXT:    [[TMP72:%.*]] = xor i1 [[TMP70]], true
+; NOVEC-NEXT:    [[TMP73:%.*]] = or i1 [[TMP71]], [[TMP72]]
+; NOVEC-NEXT:    [[TMP74:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
+; NOVEC-NEXT:    [[TMP75:%.*]] = xor i1 [[TMP73]], true
+; NOVEC-NEXT:    [[TMP76:%.*]] = xor i1 [[TMP74]], true
+; NOVEC-NEXT:    [[TMP77:%.*]] = xor i1 [[TMP75]], true
+; NOVEC-NEXT:    [[TMP78:%.*]] = xor i1 [[TMP76]], true
+; NOVEC-NEXT:    [[TMP79:%.*]] = or i1 [[TMP77]], [[TMP78]]
+; NOVEC-NEXT:    [[TMP80:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
+; NOVEC-NEXT:    [[TMP81:%.*]] = xor i1 [[TMP79]], true
+; NOVEC-NEXT:    [[TMP82:%.*]] = xor i1 [[TMP80]], true
+; NOVEC-NEXT:    [[TMP83:%.*]] = xor i1 [[TMP81]], true
+; NOVEC-NEXT:    [[TMP84:%.*]] = xor i1 [[TMP82]], true
+; NOVEC-NEXT:    [[TMP85:%.*]] = or i1 [[TMP83]], [[TMP84]]
+; NOVEC-NEXT:    [[TMP86:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
+; NOVEC-NEXT:    [[TMP87:%.*]] = xor i1 [[TMP85]], true
+; NOVEC-NEXT:    [[TMP88:%.*]] = xor i1 [[TMP86]], true
+; NOVEC-NEXT:    [[TMP89:%.*]] = xor i1 [[TMP87]], true
+; NOVEC-NEXT:    [[TMP90:%.*]] = xor i1 [[TMP88]], true
+; NOVEC-NEXT:    [[TMP91:%.*]] = or i1 [[TMP89]], [[TMP90]]
+; NOVEC-NEXT:    br i1 [[TMP91]], label [[TMP92:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; NOVEC:       92:
+; NOVEC-NEXT:    store i1 false, ptr [[P]], align 1
+; NOVEC-NEXT:    br label [[MIDDLE_BLOCK]]
+; NOVEC:       middle.block:
+; NOVEC-NEXT:    [[INDEX_NEXT:%.*]] = add i32 0, 16
+; NOVEC-NEXT:    ret void
+;
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE30:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE30]] ]
+; CHECK-NEXT:    store i1 false, ptr [[P]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10
+; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT:%.*]], label [[PRED_STORE_CONTINUE30]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+  %trunc = trunc i40 0 to i32
----------------
fhahn wrote:

Perhaps it would be possible to generate a more sensible reproducer by trying to reduce the original reproducer from https://github.com/llvm/llvm-project/issues/47665, but with `dce,instcombine` running before `loop-vectorize` with in the reduction script?

https://github.com/llvm/llvm-project/pull/91136


More information about the llvm-commits mailing list