[llvm] [LSV] Insert casts to vectorize mismatched types (PR #134436)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu May 1 07:03:01 PDT 2025


================
@@ -93,19 +131,217 @@ entry:
   ret void
 }
 
-; Ideally this would be merged
-; CHECK-LABEL: @merge_load_i32_v2i16(
-; CHECK: load i32,
-; CHECK: load <2 x i16>
 define amdgpu_kernel void @merge_load_i32_v2i16(ptr addrspace(1) nocapture %a) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @merge_load_i32_v2i16(
+; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[A]], align 4, !invariant.load [[META0:![0-9]+]], !nontemporal [[META1:![0-9]+]]
+; CHECK-NEXT:    [[LD_01:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
+; CHECK-NEXT:    [[DOTCAST:%.*]] = bitcast i32 [[TMP1]] to <2 x i16>
+; CHECK-NEXT:    ret void
+;
 entry:
   %a.1 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 1
 
-  %ld.0 = load i32, ptr addrspace(1) %a
-  %ld.1 = load <2 x i16>, ptr addrspace(1) %a.1
+  %ld.0 = load i32, ptr addrspace(1) %a, align 4, !nontemporal !0, !invariant.load !1
+  %ld.1 = load <2 x i16>, ptr addrspace(1) %a.1, align 4, !nontemporal !0, !invariant.load !1
 
   ret void
 }
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
+
+!0 = !{!"nontemporal"}
+!1 = !{!"invariant.load"}
+
+
+define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
----------------
arsenm wrote:

Probably should move the new complex cases to a new file 

https://github.com/llvm/llvm-project/pull/134436


More information about the llvm-commits mailing list