[llvm] [VPlan] Use pointer to member 0 as VPInterleaveRecipe's pointer arg. (PR #106431)
Alexander Kornienko via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 15 09:57:47 PDT 2024
alexfh wrote:
It actually looks like a miscompile. The diff in the generated IR before and after this commit (and the follow-up):
```
--- /tmp/q-good.ll 2024-10-15 18:29:02.491364585 +0200
+++ /tmp/q-bad.ll 2024-10-15 18:28:29.535008374 +0200
@@ -1281,8 +1281,8 @@
%41 = mul i64 %40, 24
%42 = getelementptr i8, ptr %4, i64 %41
%43 = insertelement <2 x i64> <i64 poison, i64 0>, i64 %12, i64 0
- %44 = getelementptr i8, ptr %4, i64 8
- %45 = getelementptr i8, ptr %4, i64 56
+ %44 = getelementptr i8, ptr %4, i64 16
+ %45 = getelementptr i8, ptr %4, i64 64
br label %46
46: ; preds = %46, %36
@@ -1291,16 +1291,16 @@
%49 = phi <2 x i64> [ zeroinitializer, %36 ], [ %68, %46 ]
%50 = mul i64 %47, 24
%51 = getelementptr i8, ptr %44, i64 %50
- %52 = load <6 x ptr>, ptr %51, align 8
- %53 = shufflevector <6 x ptr> %52, <6 x ptr> poison, <2 x i32> <i32 0, i32 3>
- %54 = shufflevector <6 x ptr> %52, <6 x ptr> poison, <2 x i32> <i32 1, i32 4>
- %55 = getelementptr i8, ptr %45, i64 %50
- %56 = load <6 x ptr>, ptr %55, align 8
+ %52 = getelementptr i8, ptr %45, i64 %50
+ %53 = load <6 x ptr>, ptr %51, align 8
+ %54 = shufflevector <6 x ptr> %53, <6 x ptr> poison, <2 x i32> <i32 0, i32 3>
+ %55 = shufflevector <6 x ptr> %53, <6 x ptr> poison, <2 x i32> <i32 1, i32 4>
+ %56 = load <6 x ptr>, ptr %52, align 8
%57 = shufflevector <6 x ptr> %56, <6 x ptr> poison, <2 x i32> <i32 0, i32 3>
%58 = shufflevector <6 x ptr> %56, <6 x ptr> poison, <2 x i32> <i32 1, i32 4>
- %59 = ptrtoint <2 x ptr> %54 to <2 x i64>
+ %59 = ptrtoint <2 x ptr> %55 to <2 x i64>
%60 = ptrtoint <2 x ptr> %58 to <2 x i64>
- %61 = ptrtoint <2 x ptr> %53 to <2 x i64>
+ %61 = ptrtoint <2 x ptr> %54 to <2 x i64>
%62 = ptrtoint <2 x ptr> %57 to <2 x i64>
%63 = sub <2 x i64> %59, %61
%64 = sub <2 x i64> %60, %62
@@ -8549,7 +8549,7 @@
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"frame-pointer", i32 1}
-!3 = !{!"clang version (00128a20eec27246719d73ba427bf821883b00b4)"}
+!3 = !{!"clang version (efcfa6e711689ada546c323316145ecd749d380a)"}
!4 = !{}
!5 = distinct !{!5, !6}
!6 = !{!"llvm.loop.mustprogress"}
```
I've reduced this to
```
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"
define i64 @_f(ptr %0, ptr %1) {
br label %4
3: ; preds = %4
ret i64 %14
4: ; preds = %4, %2
%5 = phi i64 [ %14, %4 ], [ 0, %2 ]
%6 = phi ptr [ %15, %4 ], [ %1, %2 ]
%7 = getelementptr i8, ptr %6, i64 16
%8 = load ptr, ptr %7, align 8
%9 = ptrtoint ptr %8 to i64
%10 = getelementptr i8, ptr %6, i64 8
%11 = load ptr, ptr %10, align 8
%12 = ptrtoint ptr %11 to i64
%13 = or i64 %9, %12
%14 = or i64 %13, %5
%15 = getelementptr nusw i8, ptr %6, i64 24
%16 = icmp eq ptr %6, %0
br i1 %16, label %3, label %4
; uselistorder directives
uselistorder i64 %14, { 1, 0 }
}
```
```
$ diff -u100 <(./clang-good --target=aarch64-linux-gnu -O3 -emit-llvm -S reduced.ll -o -) <(./clang-bad --target=aarch64-linux-gnu -O3 -emit-llvm -S reduced.ll -o -)
--- /dev/fd/63 2024-10-15 18:55:25.820702792 +0200
+++ /dev/fd/62 2024-10-15 18:55:25.824702835 +0200
@@ -1,90 +1,90 @@
; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read)
define i64 @_f(ptr readnone %0, ptr readonly %1) local_unnamed_addr #0 {
%3 = ptrtoint ptr %1 to i64
%4 = ptrtoint ptr %0 to i64
%5 = sub i64 %4, %3
%min.iters.check = icmp ult i64 %5, 96
br i1 %min.iters.check, label %scalar.ph.preheader, label %vector.ph
scalar.ph.preheader: ; preds = %middle.block, %2
%.ph = phi i64 [ 0, %2 ], [ %20, %middle.block ]
%.ph7 = phi ptr [ %1, %2 ], [ %ind.end, %middle.block ]
br label %scalar.ph
vector.ph: ; preds = %2
%6 = udiv i64 %5, 24
%7 = add nuw nsw i64 %6, 1
%n.mod.vf = and i64 %7, 3
%8 = icmp eq i64 %n.mod.vf, 0
%9 = select i1 %8, i64 4, i64 %n.mod.vf
%n.vec = sub nsw i64 %7, %9
%10 = mul i64 %n.vec, 24
%ind.end = getelementptr i8, ptr %1, i64 %10
- %invariant.gep = getelementptr i8, ptr %1, i64 8
- %invariant.gep12 = getelementptr i8, ptr %1, i64 56
+ %invariant.gep = getelementptr i8, ptr %1, i64 16
+ %invariant.gep12 = getelementptr i8, ptr %1, i64 64
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <2 x i64> [ zeroinitializer, %vector.ph ], [ %17, %vector.body ]
%vec.phi1 = phi <2 x i64> [ zeroinitializer, %vector.ph ], [ %18, %vector.body ]
%offset.idx = mul i64 %index, 24
%gep = getelementptr i8, ptr %invariant.gep, i64 %offset.idx
+ %gep13 = getelementptr i8, ptr %invariant.gep12, i64 %offset.idx
%wide.vec = load <6 x ptr>, ptr %gep, align 8
%strided.vec = shufflevector <6 x ptr> %wide.vec, <6 x ptr> poison, <2 x i32> <i32 0, i32 3>
%strided.vec3 = shufflevector <6 x ptr> %wide.vec, <6 x ptr> poison, <2 x i32> <i32 1, i32 4>
- %gep13 = getelementptr i8, ptr %invariant.gep12, i64 %offset.idx
%wide.vec4 = load <6 x ptr>, ptr %gep13, align 8
%strided.vec5 = shufflevector <6 x ptr> %wide.vec4, <6 x ptr> poison, <2 x i32> <i32 0, i32 3>
%strided.vec6 = shufflevector <6 x ptr> %wide.vec4, <6 x ptr> poison, <2 x i32> <i32 1, i32 4>
%11 = ptrtoint <2 x ptr> %strided.vec3 to <2 x i64>
%12 = ptrtoint <2 x ptr> %strided.vec6 to <2 x i64>
%13 = ptrtoint <2 x ptr> %strided.vec to <2 x i64>
%14 = ptrtoint <2 x ptr> %strided.vec5 to <2 x i64>
%15 = or <2 x i64> %vec.phi, %11
%16 = or <2 x i64> %vec.phi1, %12
%17 = or <2 x i64> %15, %13
%18 = or <2 x i64> %16, %14
%index.next = add nuw i64 %index, 4
%19 = icmp eq i64 %index.next, %n.vec
br i1 %19, label %middle.block, label %vector.body, !llvm.loop !0
middle.block: ; preds = %vector.body
%bin.rdx = or <2 x i64> %18, %17
%20 = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %bin.rdx)
br label %scalar.ph.preheader
21: ; preds = %scalar.ph
ret i64 %31
scalar.ph: ; preds = %scalar.ph.preheader, %scalar.ph
%22 = phi i64 [ %31, %scalar.ph ], [ %.ph, %scalar.ph.preheader ]
%23 = phi ptr [ %32, %scalar.ph ], [ %.ph7, %scalar.ph.preheader ]
%24 = getelementptr i8, ptr %23, i64 16
%25 = load ptr, ptr %24, align 8
%26 = ptrtoint ptr %25 to i64
%27 = getelementptr i8, ptr %23, i64 8
%28 = load ptr, ptr %27, align 8
%29 = ptrtoint ptr %28 to i64
%30 = or i64 %22, %26
%31 = or i64 %30, %29
%32 = getelementptr nusw i8, ptr %23, i64 24
%33 = icmp eq ptr %23, %0
br i1 %33, label %21, label %scalar.ph, !llvm.loop !3
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) #1
attributes #0 = { nofree norecurse nosync nounwind memory(argmem: read) }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
!0 = distinct !{!0, !1, !2}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = !{!"llvm.loop.unroll.runtime.disable"}
!3 = distinct !{!3, !2, !1}
```
Hopefully, no important details were lost during reduction.
https://github.com/llvm/llvm-project/pull/106431
More information about the llvm-commits
mailing list