[llvm] [LSV] Insert casts to vectorize mismatched types (PR #134436)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu May 1 07:03:01 PDT 2025
================
@@ -93,19 +131,217 @@ entry:
ret void
}
-; Ideally this would be merged
-; CHECK-LABEL: @merge_load_i32_v2i16(
-; CHECK: load i32,
-; CHECK: load <2 x i16>
define amdgpu_kernel void @merge_load_i32_v2i16(ptr addrspace(1) nocapture %a) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @merge_load_i32_v2i16(
+; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[A]], align 4, !invariant.load [[META0:![0-9]+]], !nontemporal [[META1:![0-9]+]]
+; CHECK-NEXT: [[LD_01:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i32 [[TMP1]] to <2 x i16>
+; CHECK-NEXT: ret void
+;
entry:
%a.1 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 1
- %ld.0 = load i32, ptr addrspace(1) %a
- %ld.1 = load <2 x i16>, ptr addrspace(1) %a.1
+ %ld.0 = load i32, ptr addrspace(1) %a, align 4, !nontemporal !0, !invariant.load !1
+ %ld.1 = load <2 x i16>, ptr addrspace(1) %a.1, align 4, !nontemporal !0, !invariant.load !1
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
+
+!0 = !{!"nontemporal"}
+!1 = !{!"invariant.load"}
+
+
+define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
----------------
arsenm wrote:
Probably should move the new complex cases to a new file
https://github.com/llvm/llvm-project/pull/134436
More information about the llvm-commits
mailing list