[llvm] [AArch64] - Improve costing for Identity shuffles for SVE targets. (PR #165375)

Pawan Nirpal via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 28 05:03:11 PDT 2025


https://github.com/pawan-nirpal-031 updated https://github.com/llvm/llvm-project/pull/165375

>From 70bf624b5a1c129b027ed0892b567349c44d57fb Mon Sep 17 00:00:00 2001
From: Pawan Nirpal <pnirpal at qti.qualcomm.com>
Date: Tue, 28 Oct 2025 17:22:03 +0530
Subject: [PATCH] [AArch64] - Improve costing for Identity shuffles for SVE
 targets.

Identity masks can be treated as free when SVE is available with fixed vectors.
This allows for aggressive vector combines for identity shuffle masks.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |   9 +
 .../AArch64/identity-shuffle-sve.ll           | 322 ++++++++++++++++++
 2 files changed, 331 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 490f6391c15a0..2f0bf300f8a8f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5782,6 +5782,15 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
         VTy->getPrimitiveSizeInBits() / AArch64::SVEBitsPerBlock;
     unsigned SegmentElts = VTy->getNumElements() / Segments;
 
+    // Check for identity masks when SVE is available, which we can treat as
+    // free.
+    if (LT.second.isFixedLengthVector() && ST->isSVEorStreamingSVEAvailable() &&
+        (Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
+        all_of(enumerate(Mask), [](const auto &M) {
+          return M.value() < 0 || M.value() == (int)M.index();
+        }))
+      return 0;
+
     // dupq zd.t, zn.t[idx]
     if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
         ST->isSVEorStreamingSVEAvailable() &&
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll b/llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll
new file mode 100644
index 0000000000000..f46a10906feb9
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll
@@ -0,0 +1,322 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=vector-combine -mtriple=aarch64-unknown-linux-gnu -S %s | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=COST
+target triple = "aarch64-unknown-linux-gnu"
+
+ at global = external hidden global [64 x i8], align 1
+ at global.1 = external local_unnamed_addr global ptr, align 8
+ at global.2 = external hidden unnamed_addr constant [10 x i8], align 1
+ at global.3 = external hidden unnamed_addr constant [30 x i8], align 1
+ at global.4 = external hidden unnamed_addr constant [80 x i8], align 1
+ at global.5 = external global i32, align 4
+
+; Function Attrs: nounwind uwtable vscale_range(1,16)
+define dso_local i32 @ham(ptr noundef %arg, i32 noundef %arg1, ptr noundef %arg2) local_unnamed_addr #0 {
+  ; CHECK-LABEL: define dso_local i32 @ham(
+  ; CHECK-SAME: ptr noundef [[ARG:%.*]], i32 noundef [[ARG1:%.*]], ptr noundef [[ARG2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+  ; CHECK-NEXT:  [[BB:.*:]]
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4
+  ; CHECK-NEXT:    [[ALLOCA3:%.*]] = alloca i32, align 4
+  ; CHECK-NEXT:    [[ALLOCA4:%.*]] = alloca i32, align 4
+  ; CHECK-NEXT:    [[ALLOCA5:%.*]] = alloca i32, align 4
+  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[ALLOCA]]) #[[ATTR4:[0-9]+]]
+  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[ALLOCA3]]) #[[ATTR4]]
+  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[ALLOCA4]]) #[[ATTR4]]
+  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[ALLOCA5]]) #[[ATTR4]]
+  ; CHECK-NEXT:    tail call void @zot() #[[ATTR4]]
+  ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @pluto(i32 noundef [[ARG1]], ptr noundef [[ARG2]]) #[[ATTR4]]
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr @global.1, align 8, !tbaa [[TBAA5:![0-9]+]]
+  ; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq ptr [[LOAD]], null
+  ; CHECK-NEXT:    br i1 [[ICMP]], label %[[BB9:.*]], label %[[BB6:.*]]
+  ; CHECK:       [[BB6]]:
+  ; CHECK-NEXT:    [[LOAD7:%.*]] = load i8, ptr [[LOAD]], align 1, !tbaa [[TBAA10:![0-9]+]]
+  ; CHECK-NEXT:    [[ICMP8:%.*]] = icmp eq i8 [[LOAD7]], 0
+  ; CHECK-NEXT:    br i1 [[ICMP8]], label %[[BB9]], label %[[BB11:.*]]
+  ; CHECK:       [[BB9]]:
+  ; CHECK-NEXT:    [[CALL10:%.*]] = tail call ptr @pluto.9(ptr noundef nonnull @global.2, ptr noundef nonnull @global.3, i32 noundef 218) #[[ATTR4]]
+  ; CHECK-NEXT:    store ptr [[CALL10]], ptr @global.1, align 8, !tbaa [[TBAA5]]
+  ; CHECK-NEXT:    br label %[[BB11]]
+  ; CHECK:       [[BB11]]:
+  ; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[CALL10]], %[[BB9]] ], [ [[LOAD]], %[[BB6]] ]
+  ; CHECK-NEXT:    [[CALL12:%.*]] = call ptr @baz(ptr noundef [[PHI]], ptr noundef nonnull [[ALLOCA]], ptr noundef nonnull [[ALLOCA3]], ptr noundef nonnull [[ALLOCA4]], ptr noundef nonnull [[ALLOCA5]]) #[[ATTR4]]
+  ; CHECK-NEXT:    [[LOAD13:%.*]] = load i32, ptr [[ALLOCA5]], align 4, !tbaa [[TBAA11:![0-9]+]]
+  ; CHECK-NEXT:    [[ICMP14:%.*]] = icmp eq i32 [[LOAD13]], 3
+  ; CHECK-NEXT:    br i1 [[ICMP14]], label %[[BB17:.*]], label %[[BB15:.*]]
+  ; CHECK:       [[BB15]]:
+  ; CHECK:          br label %[[BB17]]
+  ; CHECK:         [[BB17]]:
+  ; CHECK-NEXT:    [[LOAD18:%.*]] = load i32, ptr [[ALLOCA]], align 4, !tbaa [[TBAA11]]
+  ; CHECK-NEXT:    [[LOAD19:%.*]] = load i32, ptr [[ALLOCA3]], align 4, !tbaa [[TBAA11]]
+  ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[LOAD19]], [[LOAD18]]
+  ; CHECK-NEXT:    [[MUL20:%.*]] = mul i32 [[MUL]], 3
+  ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[MUL20]] to i64
+  ; CHECK-NEXT:    [[CALL21:%.*]] = call ptr @zot.11(i64 noundef [[ZEXT]], ptr noundef nonnull @global.3, i32 noundef 228) #[[ATTR4]]
+  ; CHECK-NEXT:    call void @wombat() #[[ATTR4]]
+  ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 144
+  ; CHECK-NEXT:    [[LOAD22:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 8, !tbaa [[TBAA13:![0-9]+]]
+  ; CHECK-NEXT:    [[ICMP23:%.*]] = icmp eq i32 [[LOAD22]], 0
+  ; CHECK-NEXT:    br i1 [[ICMP23]], label %[[BB224:.*]], label %[[BB24:.*]]
+  ; CHECK:       [[BB24]]:
+  ; CHECK-NEXT:    [[ICMP25:%.*]] = icmp eq i32 [[MUL]], 0
+  ; CHECK-NEXT:    br i1 [[ICMP25]], label %[[BB205:.*]], label %[[BB26:.*]]
+  ; CHECK:       [[BB26]]:
+  ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], -1
+  ; CHECK-NEXT:    [[ZEXT27:%.*]] = zext i32 [[ADD]] to i64
+  ; CHECK-NEXT:    [[MUL28:%.*]] = mul nuw nsw i64 [[ZEXT27]], 3
+  ; CHECK-NEXT:    [[ADD29:%.*]] = add nuw nsw i64 [[MUL28]], 3
+  ; CHECK-NEXT:    [[GETELEMENTPTR30:%.*]] = getelementptr i8, ptr [[CALL21]], i64 [[ADD29]]
+  ; CHECK-NEXT:    [[GETELEMENTPTR31:%.*]] = getelementptr i8, ptr [[CALL12]], i64 [[ADD29]]
+  ; CHECK-NEXT:    [[ZEXT32:%.*]] = zext i32 [[MUL]] to i64
+  ; CHECK-NEXT:    br label %[[BB33:.*]]
+  ; CHECK:       [[BB33]]:
+  ; CHECK-NEXT:    [[PHI34:%.*]] = phi i64 [ [[ADD201:%.*]], %[[BB191:.*]] ], [ 0, %[[BB26]] ]
+  ; CHECK-NEXT:    [[LOAD35:%.*]] = load volatile i32, ptr @global.5, align 4, !tbaa [[TBAA11]]
+  ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[LOAD35]] to i8
+  ; CHECK-NEXT:    [[LOAD36:%.*]] = load volatile i32, ptr @global.5, align 4, !tbaa [[TBAA11]]
+  ; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[LOAD36]] to i64
+  ; CHECK-NEXT:    [[GETELEMENTPTR37:%.*]] = getelementptr inbounds i8, ptr [[CALL21]], i64 [[SEXT]]
+  ; CHECK-NEXT:    store i8 [[TRUNC]], ptr [[GETELEMENTPTR37]], align 1, !tbaa [[TBAA10]]
+  ; CHECK-NEXT:    [[ICMP38:%.*]] = icmp ult i32 [[MUL]], 8
+  ; CHECK-NEXT:    br i1 [[ICMP38]], label %[[BB149:.*]], label %[[BB39:.*]]
+  ; CHECK:       [[BB39]]:
+  ; CHECK-NEXT:    [[ICMP40:%.*]] = icmp ult ptr [[CALL21]], [[GETELEMENTPTR31]]
+  ; CHECK-NEXT:    [[ICMP41:%.*]] = icmp ult ptr [[CALL12]], [[GETELEMENTPTR30]]
+  ; CHECK-NEXT:    [[AND42:%.*]] = and i1 [[ICMP40]], [[ICMP41]]
+  ; CHECK-NEXT:    br i1 [[AND42]], label %[[BB149]], label %[[BB43:.*]]
+  ; CHECK:       [[BB43]]:
+  ; CHECK-NEXT:    [[ICMP44:%.*]] = icmp ult i32 [[MUL]], 16
+  ; CHECK-NEXT:    br i1 [[ICMP44]], label %[[BB97:.*]], label %[[BB45:.*]]
+  ; CHECK:       [[BB45]]:
+  ; CHECK:         br label %[[BB48:.*]]
+  ; CHECK:       [[BB48]]:
+  ; CHECK-NEXT:    [[PHI49:%.*]] = phi i64 [ 0, %[[BB45]] ], [ [[ADD86:%.*]], %[[BB48]] ]
+  ; CHECK-NEXT:    [[MUL50:%.*]] = mul i64 [[PHI49]], 3
+  ; CHECK-NEXT:    [[GETELEMENTPTR51:%.*]] = getelementptr i8, ptr [[CALL21]], i64 [[MUL50]]
+  ; CHECK-NEXT:    [[MUL52:%.*]] = mul i64 [[PHI49]], 3
+  ; CHECK-NEXT:    [[GETELEMENTPTR53:%.*]] = getelementptr i8, ptr [[CALL12]], i64 [[MUL52]]
+  ; CHECK-NEXT:    [[LOAD54:%.*]] = load <48 x i8>, ptr [[GETELEMENTPTR53]], align 1, !tbaa [[TBAA10]], !alias.scope [[META18:![0-9]+]]
+  ; CHECK-NEXT:    [[SHUFFLEVECTOR:%.*]] = shufflevector <48 x i8> [[LOAD54]], <48 x i8> poison, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+  ; CHECK-NEXT:    [[SHUFFLEVECTOR55:%.*]] = shufflevector <48 x i8> [[LOAD54]], <48 x i8> poison, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+  ; CHECK-NEXT:    [[SHUFFLEVECTOR56:%.*]] = shufflevector <48 x i8> [[LOAD54]], <48 x i8> poison, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+  ; CHECK-NEXT:    [[ZEXT57:%.*]] = zext <16 x i8> [[SHUFFLEVECTOR]] to <16 x i32>
+  ; CHECK-NEXT:    [[ZEXT59:%.*]] = zext <16 x i8> [[SHUFFLEVECTOR55]] to <16 x i32>
+  ; CHECK-NEXT:    [[ZEXT61:%.*]] = zext <16 x i8> [[SHUFFLEVECTOR56]] to <16 x i32>
+  ; CHECK-NEXT:    [[MUL75:%.*]] = mul nuw nsw <16 x i32> [[ZEXT57]], splat (i32 13282)
+  ; CHECK-NEXT:    [[MUL76:%.*]] = mul nuw <16 x i32> [[ZEXT59]], splat (i32 16744449)
+  ; CHECK-NEXT:    [[MUL77:%.*]] = mul nuw nsw <16 x i32> [[ZEXT61]], splat (i32 19485)
+  ; CHECK-NEXT:    [[ADD78:%.*]] = add nuw nsw <16 x i32> [[MUL75]], splat (i32 32768)
+  ; CHECK-NEXT:    [[ADD79:%.*]] = add nuw <16 x i32> [[ADD78]], [[MUL76]]
+  ; CHECK-NEXT:    [[ADD80:%.*]] = add nuw <16 x i32> [[ADD79]], [[MUL77]]
+  ; CHECK-NEXT:    [[LSHR81:%.*]] = lshr <16 x i32> [[ADD80]], splat (i32 16)
+  ; CHECK-NEXT:    [[TRUNC82:%.*]] = trunc <16 x i32> [[LSHR81]] to <16 x i8>
+  ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i32> [[ZEXT57]], <16 x i32> [[ZEXT57]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw <32 x i32> [[TMP0]], <i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 19595, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+  ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <32 x i32> [[TMP1]], splat (i32 32768)
+  ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[ZEXT59]], <16 x i32> [[ZEXT59]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw <32 x i32> [[TMP3]], <i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 38470, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097, i32 16762097>
+  ; CHECK-NEXT:    [[TMP5:%.*]] = add nuw <32 x i32> [[TMP2]], [[TMP4]]
+  ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i32> [[ZEXT61]], <16 x i32> [[ZEXT61]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <32 x i32> [[TMP6]], <i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 7471, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568, i32 16759568>
+  
+  ; COST: Cost Model: Found costs of 0 for:   %shufflevector84 = shufflevector <16 x i8> %trunc82, <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+bb:
+  %alloca = alloca i32, align 4
+  %alloca3 = alloca i32, align 4
+  %alloca4 = alloca i32, align 4
+  %alloca5 = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(ptr nonnull %alloca) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %alloca3) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %alloca4) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %alloca5) #4
+  tail call void @zot() #4
+  %call = tail call i32 @pluto(i32 noundef %arg1, ptr noundef %arg2) #4
+  %load = load ptr, ptr @global.1, align 8, !tbaa !5
+  %icmp = icmp eq ptr %load, null
+  br i1 %icmp, label %bb9, label %bb6
+
+bb6:                                              ; preds = %bb
+  %load7 = load i8, ptr %load, align 1, !tbaa !10
+  %icmp8 = icmp eq i8 %load7, 0
+  br i1 %icmp8, label %bb9, label %bb11
+
+bb9:                                              ; preds = %bb6, %bb
+  %call10 = tail call ptr @pluto.9(ptr noundef nonnull @global.2, ptr noundef nonnull @global.3, i32 noundef 218) #4
+  store ptr %call10, ptr @global.1, align 8, !tbaa !5
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb6
+  %phi = phi ptr [ %call10, %bb9 ], [ %load, %bb6 ]
+  %call12 = call ptr @baz(ptr noundef %phi, ptr noundef nonnull %alloca, ptr noundef nonnull %alloca3, ptr noundef nonnull %alloca4, ptr noundef nonnull %alloca5) #4
+  %load13 = load i32, ptr %alloca5, align 4, !tbaa !11
+  %icmp14 = icmp eq i32 %load13, 3
+  br i1 %icmp14, label %bb17, label %bb15
+
+bb15:                                             ; preds = %bb11
+  %load16 = load ptr, ptr @global.1, align 8, !tbaa !5
+  call void (i32, ptr, ...) @ham.10(i32 noundef 1, ptr noundef nonnull @global.4, ptr noundef %load16, i32 noundef %load13) #4
+  br label %bb17
+
+bb17:                                             ; preds = %bb15, %bb11
+  %load18 = load i32, ptr %alloca, align 4, !tbaa !11
+  %load19 = load i32, ptr %alloca3, align 4, !tbaa !11
+  %mul = mul nsw i32 %load19, %load18
+  %mul20 = mul i32 %mul, 3
+  %zext = zext i32 %mul20 to i64
+  %call21 = call ptr @zot.11(i64 noundef %zext, ptr noundef nonnull @global.3, i32 noundef 228) #4
+  call void @wombat() #4
+  %getelementptr = getelementptr inbounds nuw i8, ptr %arg, i64 144
+  %load22 = load i32, ptr %getelementptr, align 8, !tbaa !13
+  %icmp23 = icmp eq i32 %load22, 0
+  br i1 %icmp23, label %bb205, label %bb24
+
+bb24:                                             ; preds = %bb17
+  %icmp25 = icmp eq i32 %mul, 0
+  br i1 %icmp25, label %bb205, label %bb26
+
+bb26:                                             ; preds = %bb24
+  %add = add i32 %mul, -1
+  %zext27 = zext i32 %add to i64
+  %mul28 = mul nuw nsw i64 %zext27, 3
+  %add29 = add nuw nsw i64 %mul28, 3
+  %getelementptr30 = getelementptr i8, ptr %call21, i64 %add29
+  %getelementptr31 = getelementptr i8, ptr %call12, i64 %add29
+  %zext32 = zext i32 %mul to i64
+  br label %bb33
+
+bb33:                                             ; preds = %bb191, %bb26
+  %phi34 = phi i64 [ %add201, %bb191 ], [ 0, %bb26 ]
+  %load35 = load volatile i32, ptr @global.5, align 4, !tbaa !11
+  %trunc = trunc i32 %load35 to i8
+  %load36 = load volatile i32, ptr @global.5, align 4, !tbaa !11
+  %sext = sext i32 %load36 to i64
+  %getelementptr37 = getelementptr inbounds i8, ptr %call21, i64 %sext
+  store i8 %trunc, ptr %getelementptr37, align 1, !tbaa !10
+  %icmp38 = icmp ult i32 %mul, 8
+  br i1 %icmp38, label %bb191, label %bb39
+
+bb39:                                             ; preds = %bb33
+  %icmp40 = icmp ult ptr %call21, %getelementptr31
+  %icmp41 = icmp ult ptr %call12, %getelementptr30
+  %and42 = and i1 %icmp40, %icmp41
+  br i1 %and42, label %bb191, label %bb43
+
+bb43:                                             ; preds = %bb39
+  %icmp44 = icmp ult i32 %mul, 16
+  br i1 %icmp44, label %bb191, label %bb45
+
+bb45:                                             ; preds = %bb43
+  %and47 = and i64 %zext32, 4294967280
+  br label %bb48
+
+bb48:                                             ; preds = %bb48, %bb45
+  %phi49 = phi i64 [ 0, %bb45 ], [ %add86, %bb48 ]
+  %mul50 = mul i64 %phi49, 3
+  %getelementptr51 = getelementptr i8, ptr %call21, i64 %mul50
+  %mul52 = mul i64 %phi49, 3
+  %getelementptr53 = getelementptr i8, ptr %call12, i64 %mul52
+  %load54 = load <48 x i8>, ptr %getelementptr53, align 1, !tbaa !10, !alias.scope !18
+  %shufflevector = shufflevector <48 x i8> %load54, <48 x i8> poison, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+  %shufflevector55 = shufflevector <48 x i8> %load54, <48 x i8> poison, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+  %shufflevector56 = shufflevector <48 x i8> %load54, <48 x i8> poison, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+  %zext57 = zext <16 x i8> %shufflevector to <16 x i32>
+  %mul58 = mul nuw nsw <16 x i32> %zext57, splat (i32 19595)
+  %zext59 = zext <16 x i8> %shufflevector55 to <16 x i32>
+  %mul60 = mul nuw nsw <16 x i32> %zext59, splat (i32 38470)
+  %zext61 = zext <16 x i8> %shufflevector56 to <16 x i32>
+  %mul62 = mul nuw nsw <16 x i32> %zext61, splat (i32 7471)
+  %add63 = add nuw nsw <16 x i32> %mul58, splat (i32 32768)
+  %add64 = add nuw nsw <16 x i32> %add63, %mul60
+  %add65 = add nuw nsw <16 x i32> %add64, %mul62
+  %lshr = lshr <16 x i32> %add65, splat (i32 16)
+  %trunc66 = trunc nuw <16 x i32> %lshr to <16 x i8>
+  %mul67 = mul nuw nsw <16 x i32> %zext57, splat (i32 32767)
+  %mul68 = mul nuw <16 x i32> %zext59, splat (i32 16762097)
+  %mul69 = mul nuw <16 x i32> %zext61, splat (i32 16759568)
+  %add70 = add nuw nsw <16 x i32> %mul67, splat (i32 32768)
+  %add71 = add nuw <16 x i32> %add70, %mul68
+  %add72 = add <16 x i32> %add71, %mul69
+  %lshr73 = lshr <16 x i32> %add72, splat (i32 16)
+  %trunc74 = trunc <16 x i32> %lshr73 to <16 x i8>
+  %mul75 = mul nuw nsw <16 x i32> %zext57, splat (i32 13282)
+  %mul76 = mul nuw <16 x i32> %zext59, splat (i32 16744449)
+  %mul77 = mul nuw nsw <16 x i32> %zext61, splat (i32 19485)
+  %add78 = add nuw nsw <16 x i32> %mul75, splat (i32 32768)
+  %add79 = add nuw <16 x i32> %add78, %mul76
+  %add80 = add nuw <16 x i32> %add79, %mul77
+  %lshr81 = lshr <16 x i32> %add80, splat (i32 16)
+  %trunc82 = trunc <16 x i32> %lshr81 to <16 x i8>
+  %shufflevector83 = shufflevector <16 x i8> %trunc66, <16 x i8> %trunc74, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %shufflevector84 = shufflevector <16 x i8> %trunc82, <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %shufflevector85 = shufflevector <32 x i8> %shufflevector83, <32 x i8> %shufflevector84, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+  store <48 x i8> %shufflevector85, ptr %getelementptr51, align 1, !tbaa !10, !alias.scope !21, !noalias !18
+  %add86 = add nuw i64 %phi49, 16
+  %icmp87 = icmp eq i64 %add86, %and47
+  br i1 %icmp87, label %bb88, label %bb48, !llvm.loop !23
+
+bb88:                                             ; preds = %bb48
+  %icmp89 = icmp eq i64 %and47, %zext32
+  br label %bb191
+
+bb191:                                            ; preds = %bb88
+  %add201 = add nuw nsw i64 %phi34, 1
+  %load202 = load i32, ptr %getelementptr, align 8, !tbaa !13
+  %zext203 = zext i32 %load202 to i64
+  %icmp204 = icmp samesign ult i64 %add201, %zext203
+  br i1 %icmp204, label %bb33, label %bb205
+
+bb205:                                            ; preds = %bb205, %bb24
+  ret i32 0
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #1
+
+declare void @zot() local_unnamed_addr #2
+
+declare i32 @pluto(i32 noundef, ptr noundef) local_unnamed_addr #2
+
+declare ptr @pluto.9(ptr noundef, ptr noundef, i32 noundef) local_unnamed_addr #2
+
+declare ptr @baz(ptr noundef, ptr noundef, ptr noundef, ptr noundef, ptr noundef) local_unnamed_addr #2
+
+declare void @ham.10(i32 noundef, ptr noundef, ...) local_unnamed_addr #2
+
+declare ptr @zot.11(i64 noundef, ptr noundef, i32 noundef) local_unnamed_addr #2
+
+declare void @wombat() local_unnamed_addr #2
+
+
+attributes #0 = { nounwind uwtable vscale_range(1,16) "target-cpu"="cortex-a57" "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a,-fmv"}
+attributes #2 = { "target-cpu"="cortex-a57" "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a,-fmv" }
+attributes #4 = { nounwind }
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 7, !"frame-pointer", i32 1}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"p1 omnipotent char", !7, i64 0}
+!7 = !{!"any pointer", !8, i64 0}
+!8 = !{!"omnipotent char", !9, i64 0}
+!9 = !{!"Simple C/C++ TBAA"}
+!10 = !{!8, !8, i64 0}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !8, i64 0}
+!13 = !{!14, !12, i64 144}
+!14 = !{!"TCDef", !8, i64 0, !8, i64 16, !8, i64 32, !8, i64 48, !8, i64 64, !15, i64 128, !16, i64 130, !16, i64 134, !16, i64 138, !12, i64 144, !12, i64 148, !12, i64 152, !15, i64 156, !17, i64 160, !17, i64 168, !17, i64 176, !17, i64 184}
+!15 = !{!"short", !8, i64 0}
+!16 = !{!"", !8, i64 0, !8, i64 1, !8, i64 2, !8, i64 3}
+!17 = !{!"long", !8, i64 0}
+!18 = !{!19}
+!19 = distinct !{!19, !20}
+!20 = distinct !{!20, !"LVerDomain"}
+!21 = !{!22}
+!22 = distinct !{!22, !20}
+!23 = distinct !{!23, !24, !25}
+!24 = !{!"llvm.loop.isvectorized", i32 1}
+!25 = !{!"llvm.loop.unroll.runtime.disable"}
+!26 = !{!"branch_weights", i32 8, i32 8}
+!27 = distinct !{!27, !24, !25}
+!28 = distinct !{!28, !24}
+



More information about the llvm-commits mailing list