[llvm] [VectorCombine] Fix scalarizeExtExtract for big-endian (PR #157962)

Wed Sep 10 14:59:25 PDT 2025

https://github.com/uyoyo0 created https://github.com/llvm/llvm-project/pull/157962

The scalarizeExtExtract transform assumed little-endian lane ordering,
causing miscompiles on big-endian targets such as AIX/PowerPC under -O3 -flto.

This patch updates the shift calculation to handle endianness correctly for big-endian targets. No functional change
for little-endian targets.

>From 1e1ef6c1987d957c65f78e68df41287aa8691830 Mon Sep 17 00:00:00 2001
From: Uyiosa Iyekekpolor <uyiosaben at gmail.com>
Date: Wed, 10 Sep 2025 17:56:01 -0400
Subject: [PATCH] [VectorCombine] Fix scalarizeExtExtract for big-endian

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 11 +++++++--
 .../scalarize-ext-extract-endian.ll           | 23 +++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 17cb18a22336a..ce785278b32a5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2011,12 +2011,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
       IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
   uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());
   uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
+  uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy);
+  Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits);
+  Value *Mask = ConstantInt::get(PackedTy, EltBitMask);
   for (User *U : Ext->users()) {
     auto *Extract = cast<ExtractElementInst>(U);
     uint64_t Idx =
         cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
-    Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits);
-    Value *And = Builder.CreateAnd(LShr, EltBitMask);
+    uint64_t ShiftAmt = DL->isBigEndian()
+        ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
+        : (Idx * SrcEltSizeInBits);
+    Value *ShAmtVal = ConstantInt::get(PackedTy, ShiftAmt);
+    Value *LShr = Builder.CreateLShr(ScalarV, ShAmtVal);
+    Value *And = Builder.CreateAnd(LShr, Mask);
     U->replaceAllUsesWith(And);
   }
   return true;
diff --git a/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll b/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll
new file mode 100644
index 0000000000000..3a1fbaef3df59
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/scalarize-ext-extract-endian.ll
@@ -0,0 +1,23 @@
+; RUN: opt -passes='vector-combine,dce' -S -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=LE
+; RUN: opt -passes='vector-combine,dce' -S -mtriple=powerpc64-ibm-aix-xcoff   %s -o - | FileCheck %s --check-prefix=BE
+
+define i64 @g(<8 x i8> %v) {
+  %z  = zext <8 x i8> %v to <8 x i64>
+  %e0 = extractelement <8 x i64> %z, i32 0
+  %e7 = extractelement <8 x i64> %z, i32 7
+  %sum = add i64 %e0, %e7
+  ret i64 %sum
+}
+
+; LE-LABEL: @g(
+; LE: bitcast <8 x i8> %{{.*}} to i64
+; LE: lshr i64 %{{.*}}, 56
+; LE: and i64 %{{.*}}, 255
+; LE-NOT: extractelement
+
+; BE-LABEL: @g(
+; BE: bitcast <8 x i8> %{{.*}} to i64
+; BE: and i64 %{{.*}}, 255
+; BE: lshr i64 %{{.*}}, 56
+; BE-NOT: extractelement
+