[llvm-commits] [llvm] r132985 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp test/CodeGen/X86/mem-promote-integers.ll

Nadav Rotem nadav.rotem at intel.com
Tue Jun 14 01:11:52 PDT 2011


Author: nadav
Date: Tue Jun 14 03:11:52 2011
New Revision: 132985

URL: http://llvm.org/viewvc/llvm-project?rev=132985&view=rev
Log:

Add a testcase for checking the integer-promotion of many different vector
types (with power of two types such as 8,16,32 .. 512).

Fix a bug in the integer promotion of bitcast nodes. Enable integer expanding
only if the target of the conversion is an integer (when the type action is
scalarize).

Add handling to the legalization of vector load/store in cases where the saved
vector is integer-promoted.


Added:
    llvm/trunk/test/CodeGen/X86/mem-promote-integers.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=132985&r1=132984&r2=132985&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Tue Jun 14 03:11:52 2011
@@ -1374,6 +1374,91 @@
           Tmp2 = LegalizeOp(Load.getValue(1));
           break;
         }
+
+        // If this is a promoted vector load, and the vector element types are
+        // legal, then scalarize it.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+          isTypeLegal(Node->getValueType(0).getScalarType())) {
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  Node->getValueType(0).getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+
+            LoadVals.push_back(ScalarLoad.getValue(0));
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // illegal, create the promoted vector from bitcasted segments.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+          EVT MemElemTy = Node->getValueType(0).getScalarType();
+          EVT SrcSclrTy = SrcVT.getScalarType();
+          unsigned SizeRatio =
+            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  SrcVT.getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+            if (TLI.isBigEndian()) {
+              // MSB (which is garbage, comes first)
+              LoadVals.push_back(ScalarLoad.getValue(0));
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+            } else {
+              // LSB (which is data, comes first)
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+              LoadVals.push_back(ScalarLoad.getValue(0));
+            }
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+            SrcVT.getScalarType(), NumElem*SizeRatio);
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
+            TempWideVector, &LoadVals[0], LoadVals.size());
+
+          // Cast to the correct type
+          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+
+        }
+
         // FIXME: This does not work for vectors on most targets.  Sign- and
         // zero-extend operations are currently folded into extending loads,
         // whether they are legal or not, and then we end up here without any
@@ -1549,6 +1634,88 @@
           Result = TLI.LowerOperation(Result, DAG);
           break;
         case Expand:
+
+          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+          EVT NarrowScalarVT = StVT.getScalarType();
+
+          // The Store type is illegal, must scalarize the vector store.
+          SmallVector<SDValue, 8> Stores;
+          bool ScalarLegal = isTypeLegal(WideScalarVT);
+          if (!isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+            unsigned NumElem = StVT.getVectorNumElements();
+
+            unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+            // Round odd types to the next pow of two.
+            if (!isPowerOf2_32(ScalarSize))
+              ScalarSize = NextPowerOf2(ScalarSize);
+            // Types smaller than 8 bits are promoted to 8 bits.
+            ScalarSize = std::max<unsigned>(ScalarSize, 8);
+            // Store stride
+            unsigned Stride = ScalarSize/8;
+            assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+            for (unsigned Idx=0; Idx<NumElem; Idx++) {
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+              EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+              Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+              SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                           ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                           isVolatile, isNonTemporal, Alignment);
+              Stores.push_back(Store);
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+          // The Store type is illegal, must scalarize the vector store.
+          // However, the scalar type is illegal. Must bitcast the result
+          // and store it in smaller parts.
+          if (!isTypeLegal(StVT) && StVT.isVector()) {
+            unsigned WideNumElem = StVT.getVectorNumElements();
+            unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+            unsigned SizeRatio =
+              (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+            EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+                                               SizeRatio*WideNumElem);
+
+            // Cast the wide elem vector to wider vec with smaller elem type.
+            // Example <2 x i64> -> <4 x i32>
+            Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+            for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+              // Extract elment i
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+              // bump pointer.
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+
+              // Store if, this element is:
+              //  - First element on big endian, or
+              //  - Last element on little endian
+              if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+                  ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+                SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                             ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                             isVolatile, isNonTemporal, Alignment);
+                Stores.push_back(Store);
+              }
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+
           // TRUNCSTORE:i16 i32 -> STORE i16
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=132985&r1=132984&r2=132985&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Tue Jun 14 03:11:52 2011
@@ -204,8 +204,10 @@
     break;
   case TargetLowering::TypeScalarizeVector:
     // Convert the element to an integer and promote it by hand.
-    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
-                       BitConvertToInteger(GetScalarizedVector(InOp)));
+    if (!NOutVT.isVector())
+      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                         BitConvertToInteger(GetScalarizedVector(InOp)));
+    break;
   case TargetLowering::TypeSplitVector: {
     // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.

Added: llvm/trunk/test/CodeGen/X86/mem-promote-integers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mem-promote-integers.ll?rev=132985&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mem-promote-integers.ll (added)
+++ llvm/trunk/test/CodeGen/X86/mem-promote-integers.ll Tue Jun 14 03:11:52 2011
@@ -0,0 +1,1467 @@
+; RUN: llc -march=x86 -promote-elements < %s
+; RUN: llc -march=x86                   < %s
+; RUN: llc -march=x86-64 -promote-elements < %s
+; RUN: llc -march=x86-64                   < %s
+
+define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
+  %bb = load <1 x i8>* %b
+  %tt = xor <1 x i8> %x, %bb
+  store <1 x i8> %tt, <1 x i8>* %b
+  br label %next
+
+next:
+  ret <1 x i8> %tt
+}
+
+
+define <1 x i16> @test_1xi16(<1 x i16> %x, <1 x i16>* %b) {
+  %bb = load <1 x i16>* %b
+  %tt = xor <1 x i16> %x, %bb
+  store <1 x i16> %tt, <1 x i16>* %b
+  br label %next
+
+next:
+  ret <1 x i16> %tt
+}
+
+
+define <1 x i32> @test_1xi32(<1 x i32> %x, <1 x i32>* %b) {
+  %bb = load <1 x i32>* %b
+  %tt = xor <1 x i32> %x, %bb
+  store <1 x i32> %tt, <1 x i32>* %b
+  br label %next
+
+next:
+  ret <1 x i32> %tt
+}
+
+
+define <1 x i64> @test_1xi64(<1 x i64> %x, <1 x i64>* %b) {
+  %bb = load <1 x i64>* %b
+  %tt = xor <1 x i64> %x, %bb
+  store <1 x i64> %tt, <1 x i64>* %b
+  br label %next
+
+next:
+  ret <1 x i64> %tt
+}
+
+
+define <1 x i128> @test_1xi128(<1 x i128> %x, <1 x i128>* %b) {
+  %bb = load <1 x i128>* %b
+  %tt = xor <1 x i128> %x, %bb
+  store <1 x i128> %tt, <1 x i128>* %b
+  br label %next
+
+next:
+  ret <1 x i128> %tt
+}
+
+
+define <1 x i256> @test_1xi256(<1 x i256> %x, <1 x i256>* %b) {
+  %bb = load <1 x i256>* %b
+  %tt = xor <1 x i256> %x, %bb
+  store <1 x i256> %tt, <1 x i256>* %b
+  br label %next
+
+next:
+  ret <1 x i256> %tt
+}
+
+
+define <1 x i512> @test_1xi512(<1 x i512> %x, <1 x i512>* %b) {
+  %bb = load <1 x i512>* %b
+  %tt = xor <1 x i512> %x, %bb
+  store <1 x i512> %tt, <1 x i512>* %b
+  br label %next
+
+next:
+  ret <1 x i512> %tt
+}
+
+
+define <2 x i8> @test_2xi8(<2 x i8> %x, <2 x i8>* %b) {
+  %bb = load <2 x i8>* %b
+  %tt = xor <2 x i8> %x, %bb
+  store <2 x i8> %tt, <2 x i8>* %b
+  br label %next
+
+next:
+  ret <2 x i8> %tt
+}
+
+
+define <2 x i16> @test_2xi16(<2 x i16> %x, <2 x i16>* %b) {
+  %bb = load <2 x i16>* %b
+  %tt = xor <2 x i16> %x, %bb
+  store <2 x i16> %tt, <2 x i16>* %b
+  br label %next
+
+next:
+  ret <2 x i16> %tt
+}
+
+
+define <2 x i32> @test_2xi32(<2 x i32> %x, <2 x i32>* %b) {
+  %bb = load <2 x i32>* %b
+  %tt = xor <2 x i32> %x, %bb
+  store <2 x i32> %tt, <2 x i32>* %b
+  br label %next
+
+next:
+  ret <2 x i32> %tt
+}
+
+
+define <2 x i64> @test_2xi64(<2 x i64> %x, <2 x i64>* %b) {
+  %bb = load <2 x i64>* %b
+  %tt = xor <2 x i64> %x, %bb
+  store <2 x i64> %tt, <2 x i64>* %b
+  br label %next
+
+next:
+  ret <2 x i64> %tt
+}
+
+
+define <2 x i128> @test_2xi128(<2 x i128> %x, <2 x i128>* %b) {
+  %bb = load <2 x i128>* %b
+  %tt = xor <2 x i128> %x, %bb
+  store <2 x i128> %tt, <2 x i128>* %b
+  br label %next
+
+next:
+  ret <2 x i128> %tt
+}
+
+
+define <2 x i256> @test_2xi256(<2 x i256> %x, <2 x i256>* %b) {
+  %bb = load <2 x i256>* %b
+  %tt = xor <2 x i256> %x, %bb
+  store <2 x i256> %tt, <2 x i256>* %b
+  br label %next
+
+next:
+  ret <2 x i256> %tt
+}
+
+
+define <2 x i512> @test_2xi512(<2 x i512> %x, <2 x i512>* %b) {
+  %bb = load <2 x i512>* %b
+  %tt = xor <2 x i512> %x, %bb
+  store <2 x i512> %tt, <2 x i512>* %b
+  br label %next
+
+next:
+  ret <2 x i512> %tt
+}
+
+
+define <3 x i8> @test_3xi8(<3 x i8> %x, <3 x i8>* %b) {
+  %bb = load <3 x i8>* %b
+  %tt = xor <3 x i8> %x, %bb
+  store <3 x i8> %tt, <3 x i8>* %b
+  br label %next
+
+next:
+  ret <3 x i8> %tt
+}
+
+
+define <3 x i16> @test_3xi16(<3 x i16> %x, <3 x i16>* %b) {
+  %bb = load <3 x i16>* %b
+  %tt = xor <3 x i16> %x, %bb
+  store <3 x i16> %tt, <3 x i16>* %b
+  br label %next
+
+next:
+  ret <3 x i16> %tt
+}
+
+
+define <3 x i32> @test_3xi32(<3 x i32> %x, <3 x i32>* %b) {
+  %bb = load <3 x i32>* %b
+  %tt = xor <3 x i32> %x, %bb
+  store <3 x i32> %tt, <3 x i32>* %b
+  br label %next
+
+next:
+  ret <3 x i32> %tt
+}
+
+
+define <3 x i64> @test_3xi64(<3 x i64> %x, <3 x i64>* %b) {
+  %bb = load <3 x i64>* %b
+  %tt = xor <3 x i64> %x, %bb
+  store <3 x i64> %tt, <3 x i64>* %b
+  br label %next
+
+next:
+  ret <3 x i64> %tt
+}
+
+
+define <3 x i128> @test_3xi128(<3 x i128> %x, <3 x i128>* %b) {
+  %bb = load <3 x i128>* %b
+  %tt = xor <3 x i128> %x, %bb
+  store <3 x i128> %tt, <3 x i128>* %b
+  br label %next
+
+next:
+  ret <3 x i128> %tt
+}
+
+
+define <3 x i256> @test_3xi256(<3 x i256> %x, <3 x i256>* %b) {
+  %bb = load <3 x i256>* %b
+  %tt = xor <3 x i256> %x, %bb
+  store <3 x i256> %tt, <3 x i256>* %b
+  br label %next
+
+next:
+  ret <3 x i256> %tt
+}
+
+
+define <3 x i512> @test_3xi512(<3 x i512> %x, <3 x i512>* %b) {
+  %bb = load <3 x i512>* %b
+  %tt = xor <3 x i512> %x, %bb
+  store <3 x i512> %tt, <3 x i512>* %b
+  br label %next
+
+next:
+  ret <3 x i512> %tt
+}
+
+
+define <4 x i8> @test_4xi8(<4 x i8> %x, <4 x i8>* %b) {
+  %bb = load <4 x i8>* %b
+  %tt = xor <4 x i8> %x, %bb
+  store <4 x i8> %tt, <4 x i8>* %b
+  br label %next
+
+next:
+  ret <4 x i8> %tt
+}
+
+
+define <4 x i16> @test_4xi16(<4 x i16> %x, <4 x i16>* %b) {
+  %bb = load <4 x i16>* %b
+  %tt = xor <4 x i16> %x, %bb
+  store <4 x i16> %tt, <4 x i16>* %b
+  br label %next
+
+next:
+  ret <4 x i16> %tt
+}
+
+
+define <4 x i32> @test_4xi32(<4 x i32> %x, <4 x i32>* %b) {
+  %bb = load <4 x i32>* %b
+  %tt = xor <4 x i32> %x, %bb
+  store <4 x i32> %tt, <4 x i32>* %b
+  br label %next
+
+next:
+  ret <4 x i32> %tt
+}
+
+
+define <4 x i64> @test_4xi64(<4 x i64> %x, <4 x i64>* %b) {
+  %bb = load <4 x i64>* %b
+  %tt = xor <4 x i64> %x, %bb
+  store <4 x i64> %tt, <4 x i64>* %b
+  br label %next
+
+next:
+  ret <4 x i64> %tt
+}
+
+
+define <4 x i128> @test_4xi128(<4 x i128> %x, <4 x i128>* %b) {
+  %bb = load <4 x i128>* %b
+  %tt = xor <4 x i128> %x, %bb
+  store <4 x i128> %tt, <4 x i128>* %b
+  br label %next
+
+next:
+  ret <4 x i128> %tt
+}
+
+
+define <4 x i256> @test_4xi256(<4 x i256> %x, <4 x i256>* %b) {
+  %bb = load <4 x i256>* %b
+  %tt = xor <4 x i256> %x, %bb
+  store <4 x i256> %tt, <4 x i256>* %b
+  br label %next
+
+next:
+  ret <4 x i256> %tt
+}
+
+
+define <4 x i512> @test_4xi512(<4 x i512> %x, <4 x i512>* %b) {
+  %bb = load <4 x i512>* %b
+  %tt = xor <4 x i512> %x, %bb
+  store <4 x i512> %tt, <4 x i512>* %b
+  br label %next
+
+next:
+  ret <4 x i512> %tt
+}
+
+
+define <5 x i8> @test_5xi8(<5 x i8> %x, <5 x i8>* %b) {
+  %bb = load <5 x i8>* %b
+  %tt = xor <5 x i8> %x, %bb
+  store <5 x i8> %tt, <5 x i8>* %b
+  br label %next
+
+next:
+  ret <5 x i8> %tt
+}
+
+
+define <5 x i16> @test_5xi16(<5 x i16> %x, <5 x i16>* %b) {
+  %bb = load <5 x i16>* %b
+  %tt = xor <5 x i16> %x, %bb
+  store <5 x i16> %tt, <5 x i16>* %b
+  br label %next
+
+next:
+  ret <5 x i16> %tt
+}
+
+
+define <5 x i32> @test_5xi32(<5 x i32> %x, <5 x i32>* %b) {
+  %bb = load <5 x i32>* %b
+  %tt = xor <5 x i32> %x, %bb
+  store <5 x i32> %tt, <5 x i32>* %b
+  br label %next
+
+next:
+  ret <5 x i32> %tt
+}
+
+
+define <5 x i64> @test_5xi64(<5 x i64> %x, <5 x i64>* %b) {
+  %bb = load <5 x i64>* %b
+  %tt = xor <5 x i64> %x, %bb
+  store <5 x i64> %tt, <5 x i64>* %b
+  br label %next
+
+next:
+  ret <5 x i64> %tt
+}
+
+
+define <5 x i128> @test_5xi128(<5 x i128> %x, <5 x i128>* %b) {
+  %bb = load <5 x i128>* %b
+  %tt = xor <5 x i128> %x, %bb
+  store <5 x i128> %tt, <5 x i128>* %b
+  br label %next
+
+next:
+  ret <5 x i128> %tt
+}
+
+
+define <5 x i256> @test_5xi256(<5 x i256> %x, <5 x i256>* %b) {
+  %bb = load <5 x i256>* %b
+  %tt = xor <5 x i256> %x, %bb
+  store <5 x i256> %tt, <5 x i256>* %b
+  br label %next
+
+next:
+  ret <5 x i256> %tt
+}
+
+
+define <5 x i512> @test_5xi512(<5 x i512> %x, <5 x i512>* %b) {
+  %bb = load <5 x i512>* %b
+  %tt = xor <5 x i512> %x, %bb
+  store <5 x i512> %tt, <5 x i512>* %b
+  br label %next
+
+next:
+  ret <5 x i512> %tt
+}
+
+
+define <6 x i8> @test_6xi8(<6 x i8> %x, <6 x i8>* %b) {
+  %bb = load <6 x i8>* %b
+  %tt = xor <6 x i8> %x, %bb
+  store <6 x i8> %tt, <6 x i8>* %b
+  br label %next
+
+next:
+  ret <6 x i8> %tt
+}
+
+
+define <6 x i16> @test_6xi16(<6 x i16> %x, <6 x i16>* %b) {
+  %bb = load <6 x i16>* %b
+  %tt = xor <6 x i16> %x, %bb
+  store <6 x i16> %tt, <6 x i16>* %b
+  br label %next
+
+next:
+  ret <6 x i16> %tt
+}
+
+
+define <6 x i32> @test_6xi32(<6 x i32> %x, <6 x i32>* %b) {
+  %bb = load <6 x i32>* %b
+  %tt = xor <6 x i32> %x, %bb
+  store <6 x i32> %tt, <6 x i32>* %b
+  br label %next
+
+next:
+  ret <6 x i32> %tt
+}
+
+
+define <6 x i64> @test_6xi64(<6 x i64> %x, <6 x i64>* %b) {
+  %bb = load <6 x i64>* %b
+  %tt = xor <6 x i64> %x, %bb
+  store <6 x i64> %tt, <6 x i64>* %b
+  br label %next
+
+next:
+  ret <6 x i64> %tt
+}
+
+
+define <6 x i128> @test_6xi128(<6 x i128> %x, <6 x i128>* %b) {
+  %bb = load <6 x i128>* %b
+  %tt = xor <6 x i128> %x, %bb
+  store <6 x i128> %tt, <6 x i128>* %b
+  br label %next
+
+next:
+  ret <6 x i128> %tt
+}
+
+
+define <6 x i256> @test_6xi256(<6 x i256> %x, <6 x i256>* %b) {
+  %bb = load <6 x i256>* %b
+  %tt = xor <6 x i256> %x, %bb
+  store <6 x i256> %tt, <6 x i256>* %b
+  br label %next
+
+next:
+  ret <6 x i256> %tt
+}
+
+
+define <6 x i512> @test_6xi512(<6 x i512> %x, <6 x i512>* %b) {
+  %bb = load <6 x i512>* %b
+  %tt = xor <6 x i512> %x, %bb
+  store <6 x i512> %tt, <6 x i512>* %b
+  br label %next
+
+next:
+  ret <6 x i512> %tt
+}
+
+
+define <7 x i8> @test_7xi8(<7 x i8> %x, <7 x i8>* %b) {
+  %bb = load <7 x i8>* %b
+  %tt = xor <7 x i8> %x, %bb
+  store <7 x i8> %tt, <7 x i8>* %b
+  br label %next
+
+next:
+  ret <7 x i8> %tt
+}
+
+
+define <7 x i16> @test_7xi16(<7 x i16> %x, <7 x i16>* %b) {
+  %bb = load <7 x i16>* %b
+  %tt = xor <7 x i16> %x, %bb
+  store <7 x i16> %tt, <7 x i16>* %b
+  br label %next
+
+next:
+  ret <7 x i16> %tt
+}
+
+
+define <7 x i32> @test_7xi32(<7 x i32> %x, <7 x i32>* %b) {
+  %bb = load <7 x i32>* %b
+  %tt = xor <7 x i32> %x, %bb
+  store <7 x i32> %tt, <7 x i32>* %b
+  br label %next
+
+next:
+  ret <7 x i32> %tt
+}
+
+
+define <7 x i64> @test_7xi64(<7 x i64> %x, <7 x i64>* %b) {
+  %bb = load <7 x i64>* %b
+  %tt = xor <7 x i64> %x, %bb
+  store <7 x i64> %tt, <7 x i64>* %b
+  br label %next
+
+next:
+  ret <7 x i64> %tt
+}
+
+
+define <7 x i128> @test_7xi128(<7 x i128> %x, <7 x i128>* %b) {
+  %bb = load <7 x i128>* %b
+  %tt = xor <7 x i128> %x, %bb
+  store <7 x i128> %tt, <7 x i128>* %b
+  br label %next
+
+next:
+  ret <7 x i128> %tt
+}
+
+
+define <7 x i256> @test_7xi256(<7 x i256> %x, <7 x i256>* %b) {
+  %bb = load <7 x i256>* %b
+  %tt = xor <7 x i256> %x, %bb
+  store <7 x i256> %tt, <7 x i256>* %b
+  br label %next
+
+next:
+  ret <7 x i256> %tt
+}
+
+
+define <7 x i512> @test_7xi512(<7 x i512> %x, <7 x i512>* %b) {
+  %bb = load <7 x i512>* %b
+  %tt = xor <7 x i512> %x, %bb
+  store <7 x i512> %tt, <7 x i512>* %b
+  br label %next
+
+next:
+  ret <7 x i512> %tt
+}
+
+
+define <8 x i8> @test_8xi8(<8 x i8> %x, <8 x i8>* %b) {
+  %bb = load <8 x i8>* %b
+  %tt = xor <8 x i8> %x, %bb
+  store <8 x i8> %tt, <8 x i8>* %b
+  br label %next
+
+next:
+  ret <8 x i8> %tt
+}
+
+
+define <8 x i16> @test_8xi16(<8 x i16> %x, <8 x i16>* %b) {
+  %bb = load <8 x i16>* %b
+  %tt = xor <8 x i16> %x, %bb
+  store <8 x i16> %tt, <8 x i16>* %b
+  br label %next
+
+next:
+  ret <8 x i16> %tt
+}
+
+
+define <8 x i32> @test_8xi32(<8 x i32> %x, <8 x i32>* %b) {
+  %bb = load <8 x i32>* %b
+  %tt = xor <8 x i32> %x, %bb
+  store <8 x i32> %tt, <8 x i32>* %b
+  br label %next
+
+next:
+  ret <8 x i32> %tt
+}
+
+
+define <8 x i64> @test_8xi64(<8 x i64> %x, <8 x i64>* %b) {
+  %bb = load <8 x i64>* %b
+  %tt = xor <8 x i64> %x, %bb
+  store <8 x i64> %tt, <8 x i64>* %b
+  br label %next
+
+next:
+  ret <8 x i64> %tt
+}
+
+
+define <8 x i128> @test_8xi128(<8 x i128> %x, <8 x i128>* %b) {
+  %bb = load <8 x i128>* %b
+  %tt = xor <8 x i128> %x, %bb
+  store <8 x i128> %tt, <8 x i128>* %b
+  br label %next
+
+next:
+  ret <8 x i128> %tt
+}
+
+
+define <8 x i256> @test_8xi256(<8 x i256> %x, <8 x i256>* %b) {
+  %bb = load <8 x i256>* %b
+  %tt = xor <8 x i256> %x, %bb
+  store <8 x i256> %tt, <8 x i256>* %b
+  br label %next
+
+next:
+  ret <8 x i256> %tt
+}
+
+
+define <8 x i512> @test_8xi512(<8 x i512> %x, <8 x i512>* %b) {
+  %bb = load <8 x i512>* %b
+  %tt = xor <8 x i512> %x, %bb
+  store <8 x i512> %tt, <8 x i512>* %b
+  br label %next
+
+next:
+  ret <8 x i512> %tt
+}
+
+
+define <9 x i8> @test_9xi8(<9 x i8> %x, <9 x i8>* %b) {
+  %bb = load <9 x i8>* %b
+  %tt = xor <9 x i8> %x, %bb
+  store <9 x i8> %tt, <9 x i8>* %b
+  br label %next
+
+next:
+  ret <9 x i8> %tt
+}
+
+
+define <9 x i16> @test_9xi16(<9 x i16> %x, <9 x i16>* %b) {
+  %bb = load <9 x i16>* %b
+  %tt = xor <9 x i16> %x, %bb
+  store <9 x i16> %tt, <9 x i16>* %b
+  br label %next
+
+next:
+  ret <9 x i16> %tt
+}
+
+
+define <9 x i32> @test_9xi32(<9 x i32> %x, <9 x i32>* %b) {
+  %bb = load <9 x i32>* %b
+  %tt = xor <9 x i32> %x, %bb
+  store <9 x i32> %tt, <9 x i32>* %b
+  br label %next
+
+next:
+  ret <9 x i32> %tt
+}
+
+
+define <9 x i64> @test_9xi64(<9 x i64> %x, <9 x i64>* %b) {
+  %bb = load <9 x i64>* %b
+  %tt = xor <9 x i64> %x, %bb
+  store <9 x i64> %tt, <9 x i64>* %b
+  br label %next
+
+next:
+  ret <9 x i64> %tt
+}
+
+
+define <9 x i128> @test_9xi128(<9 x i128> %x, <9 x i128>* %b) {
+  %bb = load <9 x i128>* %b
+  %tt = xor <9 x i128> %x, %bb
+  store <9 x i128> %tt, <9 x i128>* %b
+  br label %next
+
+next:
+  ret <9 x i128> %tt
+}
+
+
+define <9 x i256> @test_9xi256(<9 x i256> %x, <9 x i256>* %b) {
+  %bb = load <9 x i256>* %b
+  %tt = xor <9 x i256> %x, %bb
+  store <9 x i256> %tt, <9 x i256>* %b
+  br label %next
+
+next:
+  ret <9 x i256> %tt
+}
+
+
+define <9 x i512> @test_9xi512(<9 x i512> %x, <9 x i512>* %b) {
+  %bb = load <9 x i512>* %b
+  %tt = xor <9 x i512> %x, %bb
+  store <9 x i512> %tt, <9 x i512>* %b
+  br label %next
+
+next:
+  ret <9 x i512> %tt
+}
+
+
+define <10 x i8> @test_10xi8(<10 x i8> %x, <10 x i8>* %b) {
+  %bb = load <10 x i8>* %b
+  %tt = xor <10 x i8> %x, %bb
+  store <10 x i8> %tt, <10 x i8>* %b
+  br label %next
+
+next:
+  ret <10 x i8> %tt
+}
+
+
+define <10 x i16> @test_10xi16(<10 x i16> %x, <10 x i16>* %b) {
+  %bb = load <10 x i16>* %b
+  %tt = xor <10 x i16> %x, %bb
+  store <10 x i16> %tt, <10 x i16>* %b
+  br label %next
+
+next:
+  ret <10 x i16> %tt
+}
+
+
+define <10 x i32> @test_10xi32(<10 x i32> %x, <10 x i32>* %b) {
+  %bb = load <10 x i32>* %b
+  %tt = xor <10 x i32> %x, %bb
+  store <10 x i32> %tt, <10 x i32>* %b
+  br label %next
+
+next:
+  ret <10 x i32> %tt
+}
+
+
+define <10 x i64> @test_10xi64(<10 x i64> %x, <10 x i64>* %b) {
+  %bb = load <10 x i64>* %b
+  %tt = xor <10 x i64> %x, %bb
+  store <10 x i64> %tt, <10 x i64>* %b
+  br label %next
+
+next:
+  ret <10 x i64> %tt
+}
+
+
+define <10 x i128> @test_10xi128(<10 x i128> %x, <10 x i128>* %b) {
+  %bb = load <10 x i128>* %b
+  %tt = xor <10 x i128> %x, %bb
+  store <10 x i128> %tt, <10 x i128>* %b
+  br label %next
+
+next:
+  ret <10 x i128> %tt
+}
+
+
+define <10 x i256> @test_10xi256(<10 x i256> %x, <10 x i256>* %b) {
+  %bb = load <10 x i256>* %b
+  %tt = xor <10 x i256> %x, %bb
+  store <10 x i256> %tt, <10 x i256>* %b
+  br label %next
+
+next:
+  ret <10 x i256> %tt
+}
+
+
+define <10 x i512> @test_10xi512(<10 x i512> %x, <10 x i512>* %b) {
+  %bb = load <10 x i512>* %b
+  %tt = xor <10 x i512> %x, %bb
+  store <10 x i512> %tt, <10 x i512>* %b
+  br label %next
+
+next:
+  ret <10 x i512> %tt
+}
+
+
+define <11 x i8> @test_11xi8(<11 x i8> %x, <11 x i8>* %b) {
+  %bb = load <11 x i8>* %b
+  %tt = xor <11 x i8> %x, %bb
+  store <11 x i8> %tt, <11 x i8>* %b
+  br label %next
+
+next:
+  ret <11 x i8> %tt
+}
+
+
+define <11 x i16> @test_11xi16(<11 x i16> %x, <11 x i16>* %b) {
+  %bb = load <11 x i16>* %b
+  %tt = xor <11 x i16> %x, %bb
+  store <11 x i16> %tt, <11 x i16>* %b
+  br label %next
+
+next:
+  ret <11 x i16> %tt
+}
+
+
+define <11 x i32> @test_11xi32(<11 x i32> %x, <11 x i32>* %b) {
+  %bb = load <11 x i32>* %b
+  %tt = xor <11 x i32> %x, %bb
+  store <11 x i32> %tt, <11 x i32>* %b
+  br label %next
+
+next:
+  ret <11 x i32> %tt
+}
+
+
+define <11 x i64> @test_11xi64(<11 x i64> %x, <11 x i64>* %b) {
+  %bb = load <11 x i64>* %b
+  %tt = xor <11 x i64> %x, %bb
+  store <11 x i64> %tt, <11 x i64>* %b
+  br label %next
+
+next:
+  ret <11 x i64> %tt
+}
+
+
+define <11 x i128> @test_11xi128(<11 x i128> %x, <11 x i128>* %b) {
+  %bb = load <11 x i128>* %b
+  %tt = xor <11 x i128> %x, %bb
+  store <11 x i128> %tt, <11 x i128>* %b
+  br label %next
+
+next:
+  ret <11 x i128> %tt
+}
+
+
+define <11 x i256> @test_11xi256(<11 x i256> %x, <11 x i256>* %b) {
+  %bb = load <11 x i256>* %b
+  %tt = xor <11 x i256> %x, %bb
+  store <11 x i256> %tt, <11 x i256>* %b
+  br label %next
+
+next:
+  ret <11 x i256> %tt
+}
+
+
+define <11 x i512> @test_11xi512(<11 x i512> %x, <11 x i512>* %b) {
+  %bb = load <11 x i512>* %b
+  %tt = xor <11 x i512> %x, %bb
+  store <11 x i512> %tt, <11 x i512>* %b
+  br label %next
+
+next:
+  ret <11 x i512> %tt
+}
+
+
+define <12 x i8> @test_12xi8(<12 x i8> %x, <12 x i8>* %b) {
+  %bb = load <12 x i8>* %b
+  %tt = xor <12 x i8> %x, %bb
+  store <12 x i8> %tt, <12 x i8>* %b
+  br label %next
+
+next:
+  ret <12 x i8> %tt
+}
+
+
+define <12 x i16> @test_12xi16(<12 x i16> %x, <12 x i16>* %b) {
+  %bb = load <12 x i16>* %b
+  %tt = xor <12 x i16> %x, %bb
+  store <12 x i16> %tt, <12 x i16>* %b
+  br label %next
+
+next:
+  ret <12 x i16> %tt
+}
+
+
+define <12 x i32> @test_12xi32(<12 x i32> %x, <12 x i32>* %b) {
+  %bb = load <12 x i32>* %b
+  %tt = xor <12 x i32> %x, %bb
+  store <12 x i32> %tt, <12 x i32>* %b
+  br label %next
+
+next:
+  ret <12 x i32> %tt
+}
+
+
+define <12 x i64> @test_12xi64(<12 x i64> %x, <12 x i64>* %b) {
+  %bb = load <12 x i64>* %b
+  %tt = xor <12 x i64> %x, %bb
+  store <12 x i64> %tt, <12 x i64>* %b
+  br label %next
+
+next:
+  ret <12 x i64> %tt
+}
+
+
+define <12 x i128> @test_12xi128(<12 x i128> %x, <12 x i128>* %b) {
+  %bb = load <12 x i128>* %b
+  %tt = xor <12 x i128> %x, %bb
+  store <12 x i128> %tt, <12 x i128>* %b
+  br label %next
+
+next:
+  ret <12 x i128> %tt
+}
+
+
+define <12 x i256> @test_12xi256(<12 x i256> %x, <12 x i256>* %b) {
+  %bb = load <12 x i256>* %b
+  %tt = xor <12 x i256> %x, %bb
+  store <12 x i256> %tt, <12 x i256>* %b
+  br label %next
+
+next:
+  ret <12 x i256> %tt
+}
+
+
+define <12 x i512> @test_12xi512(<12 x i512> %x, <12 x i512>* %b) {
+  %bb = load <12 x i512>* %b
+  %tt = xor <12 x i512> %x, %bb
+  store <12 x i512> %tt, <12 x i512>* %b
+  br label %next
+
+next:
+  ret <12 x i512> %tt
+}
+
+
+define <13 x i8> @test_13xi8(<13 x i8> %x, <13 x i8>* %b) {
+  %bb = load <13 x i8>* %b
+  %tt = xor <13 x i8> %x, %bb
+  store <13 x i8> %tt, <13 x i8>* %b
+  br label %next
+
+next:
+  ret <13 x i8> %tt
+}
+
+
+define <13 x i16> @test_13xi16(<13 x i16> %x, <13 x i16>* %b) {
+  %bb = load <13 x i16>* %b
+  %tt = xor <13 x i16> %x, %bb
+  store <13 x i16> %tt, <13 x i16>* %b
+  br label %next
+
+next:
+  ret <13 x i16> %tt
+}
+
+
+define <13 x i32> @test_13xi32(<13 x i32> %x, <13 x i32>* %b) {
+  %bb = load <13 x i32>* %b
+  %tt = xor <13 x i32> %x, %bb
+  store <13 x i32> %tt, <13 x i32>* %b
+  br label %next
+
+next:
+  ret <13 x i32> %tt
+}
+
+
+define <13 x i64> @test_13xi64(<13 x i64> %x, <13 x i64>* %b) {
+  %bb = load <13 x i64>* %b
+  %tt = xor <13 x i64> %x, %bb
+  store <13 x i64> %tt, <13 x i64>* %b
+  br label %next
+
+next:
+  ret <13 x i64> %tt
+}
+
+
+define <13 x i128> @test_13xi128(<13 x i128> %x, <13 x i128>* %b) {
+  %bb = load <13 x i128>* %b
+  %tt = xor <13 x i128> %x, %bb
+  store <13 x i128> %tt, <13 x i128>* %b
+  br label %next
+
+next:
+  ret <13 x i128> %tt
+}
+
+
+define <13 x i256> @test_13xi256(<13 x i256> %x, <13 x i256>* %b) {
+  %bb = load <13 x i256>* %b
+  %tt = xor <13 x i256> %x, %bb
+  store <13 x i256> %tt, <13 x i256>* %b
+  br label %next
+
+next:
+  ret <13 x i256> %tt
+}
+
+
+define <13 x i512> @test_13xi512(<13 x i512> %x, <13 x i512>* %b) {
+  %bb = load <13 x i512>* %b
+  %tt = xor <13 x i512> %x, %bb
+  store <13 x i512> %tt, <13 x i512>* %b
+  br label %next
+
+next:
+  ret <13 x i512> %tt
+}
+
+
+define <14 x i8> @test_14xi8(<14 x i8> %x, <14 x i8>* %b) {
+  %bb = load <14 x i8>* %b
+  %tt = xor <14 x i8> %x, %bb
+  store <14 x i8> %tt, <14 x i8>* %b
+  br label %next
+
+next:
+  ret <14 x i8> %tt
+}
+
+
+define <14 x i16> @test_14xi16(<14 x i16> %x, <14 x i16>* %b) {
+  %bb = load <14 x i16>* %b
+  %tt = xor <14 x i16> %x, %bb
+  store <14 x i16> %tt, <14 x i16>* %b
+  br label %next
+
+next:
+  ret <14 x i16> %tt
+}
+
+
+define <14 x i32> @test_14xi32(<14 x i32> %x, <14 x i32>* %b) {
+  %bb = load <14 x i32>* %b
+  %tt = xor <14 x i32> %x, %bb
+  store <14 x i32> %tt, <14 x i32>* %b
+  br label %next
+
+next:
+  ret <14 x i32> %tt
+}
+
+
+define <14 x i64> @test_14xi64(<14 x i64> %x, <14 x i64>* %b) {
+  %bb = load <14 x i64>* %b
+  %tt = xor <14 x i64> %x, %bb
+  store <14 x i64> %tt, <14 x i64>* %b
+  br label %next
+
+next:
+  ret <14 x i64> %tt
+}
+
+
+define <14 x i128> @test_14xi128(<14 x i128> %x, <14 x i128>* %b) {
+  %bb = load <14 x i128>* %b
+  %tt = xor <14 x i128> %x, %bb
+  store <14 x i128> %tt, <14 x i128>* %b
+  br label %next
+
+next:
+  ret <14 x i128> %tt
+}
+
+
+define <14 x i256> @test_14xi256(<14 x i256> %x, <14 x i256>* %b) {
+  %bb = load <14 x i256>* %b
+  %tt = xor <14 x i256> %x, %bb
+  store <14 x i256> %tt, <14 x i256>* %b
+  br label %next
+
+next:
+  ret <14 x i256> %tt
+}
+
+
+define <14 x i512> @test_14xi512(<14 x i512> %x, <14 x i512>* %b) {
+  %bb = load <14 x i512>* %b
+  %tt = xor <14 x i512> %x, %bb
+  store <14 x i512> %tt, <14 x i512>* %b
+  br label %next
+
+next:
+  ret <14 x i512> %tt
+}
+
+
+define <15 x i8> @test_15xi8(<15 x i8> %x, <15 x i8>* %b) {
+  %bb = load <15 x i8>* %b
+  %tt = xor <15 x i8> %x, %bb
+  store <15 x i8> %tt, <15 x i8>* %b
+  br label %next
+
+next:
+  ret <15 x i8> %tt
+}
+
+
+define <15 x i16> @test_15xi16(<15 x i16> %x, <15 x i16>* %b) {
+  %bb = load <15 x i16>* %b
+  %tt = xor <15 x i16> %x, %bb
+  store <15 x i16> %tt, <15 x i16>* %b
+  br label %next
+
+next:
+  ret <15 x i16> %tt
+}
+
+
+define <15 x i32> @test_15xi32(<15 x i32> %x, <15 x i32>* %b) {
+  %bb = load <15 x i32>* %b
+  %tt = xor <15 x i32> %x, %bb
+  store <15 x i32> %tt, <15 x i32>* %b
+  br label %next
+
+next:
+  ret <15 x i32> %tt
+}
+
+
+define <15 x i64> @test_15xi64(<15 x i64> %x, <15 x i64>* %b) {
+  %bb = load <15 x i64>* %b
+  %tt = xor <15 x i64> %x, %bb
+  store <15 x i64> %tt, <15 x i64>* %b
+  br label %next
+
+next:
+  ret <15 x i64> %tt
+}
+
+
+define <15 x i128> @test_15xi128(<15 x i128> %x, <15 x i128>* %b) {
+  %bb = load <15 x i128>* %b
+  %tt = xor <15 x i128> %x, %bb
+  store <15 x i128> %tt, <15 x i128>* %b
+  br label %next
+
+next:
+  ret <15 x i128> %tt
+}
+
+
+define <15 x i256> @test_15xi256(<15 x i256> %x, <15 x i256>* %b) {
+  %bb = load <15 x i256>* %b
+  %tt = xor <15 x i256> %x, %bb
+  store <15 x i256> %tt, <15 x i256>* %b
+  br label %next
+
+next:
+  ret <15 x i256> %tt
+}
+
+
+define <15 x i512> @test_15xi512(<15 x i512> %x, <15 x i512>* %b) {
+  %bb = load <15 x i512>* %b
+  %tt = xor <15 x i512> %x, %bb
+  store <15 x i512> %tt, <15 x i512>* %b
+  br label %next
+
+next:
+  ret <15 x i512> %tt
+}
+
+
+define <16 x i8> @test_16xi8(<16 x i8> %x, <16 x i8>* %b) {
+  %bb = load <16 x i8>* %b
+  %tt = xor <16 x i8> %x, %bb
+  store <16 x i8> %tt, <16 x i8>* %b
+  br label %next
+
+next:
+  ret <16 x i8> %tt
+}
+
+
+define <16 x i16> @test_16xi16(<16 x i16> %x, <16 x i16>* %b) {
+  %bb = load <16 x i16>* %b
+  %tt = xor <16 x i16> %x, %bb
+  store <16 x i16> %tt, <16 x i16>* %b
+  br label %next
+
+next:
+  ret <16 x i16> %tt
+}
+
+
+define <16 x i32> @test_16xi32(<16 x i32> %x, <16 x i32>* %b) {
+  %bb = load <16 x i32>* %b
+  %tt = xor <16 x i32> %x, %bb
+  store <16 x i32> %tt, <16 x i32>* %b
+  br label %next
+
+next:
+  ret <16 x i32> %tt
+}
+
+
+define <16 x i64> @test_16xi64(<16 x i64> %x, <16 x i64>* %b) {
+  %bb = load <16 x i64>* %b
+  %tt = xor <16 x i64> %x, %bb
+  store <16 x i64> %tt, <16 x i64>* %b
+  br label %next
+
+next:
+  ret <16 x i64> %tt
+}
+
+
+define <16 x i128> @test_16xi128(<16 x i128> %x, <16 x i128>* %b) {
+  %bb = load <16 x i128>* %b
+  %tt = xor <16 x i128> %x, %bb
+  store <16 x i128> %tt, <16 x i128>* %b
+  br label %next
+
+next:
+  ret <16 x i128> %tt
+}
+
+
+define <16 x i256> @test_16xi256(<16 x i256> %x, <16 x i256>* %b) {
+  %bb = load <16 x i256>* %b
+  %tt = xor <16 x i256> %x, %bb
+  store <16 x i256> %tt, <16 x i256>* %b
+  br label %next
+
+next:
+  ret <16 x i256> %tt
+}
+
+
+define <16 x i512> @test_16xi512(<16 x i512> %x, <16 x i512>* %b) {
+  %bb = load <16 x i512>* %b
+  %tt = xor <16 x i512> %x, %bb
+  store <16 x i512> %tt, <16 x i512>* %b
+  br label %next
+
+next:
+  ret <16 x i512> %tt
+}
+
+
+define <17 x i8> @test_17xi8(<17 x i8> %x, <17 x i8>* %b) {
+  %bb = load <17 x i8>* %b
+  %tt = xor <17 x i8> %x, %bb
+  store <17 x i8> %tt, <17 x i8>* %b
+  br label %next
+
+next:
+  ret <17 x i8> %tt
+}
+
+
+define <17 x i16> @test_17xi16(<17 x i16> %x, <17 x i16>* %b) {
+  %bb = load <17 x i16>* %b
+  %tt = xor <17 x i16> %x, %bb
+  store <17 x i16> %tt, <17 x i16>* %b
+  br label %next
+
+next:
+  ret <17 x i16> %tt
+}
+
+
+define <17 x i32> @test_17xi32(<17 x i32> %x, <17 x i32>* %b) {
+  %bb = load <17 x i32>* %b
+  %tt = xor <17 x i32> %x, %bb
+  store <17 x i32> %tt, <17 x i32>* %b
+  br label %next
+
+next:
+  ret <17 x i32> %tt
+}
+
+
+define <17 x i64> @test_17xi64(<17 x i64> %x, <17 x i64>* %b) {
+  %bb = load <17 x i64>* %b
+  %tt = xor <17 x i64> %x, %bb
+  store <17 x i64> %tt, <17 x i64>* %b
+  br label %next
+
+next:
+  ret <17 x i64> %tt
+}
+
+
+define <17 x i128> @test_17xi128(<17 x i128> %x, <17 x i128>* %b) {
+  %bb = load <17 x i128>* %b
+  %tt = xor <17 x i128> %x, %bb
+  store <17 x i128> %tt, <17 x i128>* %b
+  br label %next
+
+next:
+  ret <17 x i128> %tt
+}
+
+
+define <17 x i256> @test_17xi256(<17 x i256> %x, <17 x i256>* %b) {
+  %bb = load <17 x i256>* %b
+  %tt = xor <17 x i256> %x, %bb
+  store <17 x i256> %tt, <17 x i256>* %b
+  br label %next
+
+next:
+  ret <17 x i256> %tt
+}
+
+
+define <17 x i512> @test_17xi512(<17 x i512> %x, <17 x i512>* %b) {
+  %bb = load <17 x i512>* %b
+  %tt = xor <17 x i512> %x, %bb
+  store <17 x i512> %tt, <17 x i512>* %b
+  br label %next
+
+next:
+  ret <17 x i512> %tt
+}
+
+
+define <18 x i8> @test_18xi8(<18 x i8> %x, <18 x i8>* %b) {
+  %bb = load <18 x i8>* %b
+  %tt = xor <18 x i8> %x, %bb
+  store <18 x i8> %tt, <18 x i8>* %b
+  br label %next
+
+next:
+  ret <18 x i8> %tt
+}
+
+
+define <18 x i16> @test_18xi16(<18 x i16> %x, <18 x i16>* %b) {
+  %bb = load <18 x i16>* %b
+  %tt = xor <18 x i16> %x, %bb
+  store <18 x i16> %tt, <18 x i16>* %b
+  br label %next
+
+next:
+  ret <18 x i16> %tt
+}
+
+
+define <18 x i32> @test_18xi32(<18 x i32> %x, <18 x i32>* %b) {
+  %bb = load <18 x i32>* %b
+  %tt = xor <18 x i32> %x, %bb
+  store <18 x i32> %tt, <18 x i32>* %b
+  br label %next
+
+next:
+  ret <18 x i32> %tt
+}
+
+
+define <18 x i64> @test_18xi64(<18 x i64> %x, <18 x i64>* %b) {
+  %bb = load <18 x i64>* %b
+  %tt = xor <18 x i64> %x, %bb
+  store <18 x i64> %tt, <18 x i64>* %b
+  br label %next
+
+next:
+  ret <18 x i64> %tt
+}
+
+
+define <18 x i128> @test_18xi128(<18 x i128> %x, <18 x i128>* %b) {
+  %bb = load <18 x i128>* %b
+  %tt = xor <18 x i128> %x, %bb
+  store <18 x i128> %tt, <18 x i128>* %b
+  br label %next
+
+next:
+  ret <18 x i128> %tt
+}
+
+
+define <18 x i256> @test_18xi256(<18 x i256> %x, <18 x i256>* %b) {
+  %bb = load <18 x i256>* %b
+  %tt = xor <18 x i256> %x, %bb
+  store <18 x i256> %tt, <18 x i256>* %b
+  br label %next
+
+next:
+  ret <18 x i256> %tt
+}
+
+
+define <18 x i512> @test_18xi512(<18 x i512> %x, <18 x i512>* %b) {
+  %bb = load <18 x i512>* %b
+  %tt = xor <18 x i512> %x, %bb
+  store <18 x i512> %tt, <18 x i512>* %b
+  br label %next
+
+next:
+  ret <18 x i512> %tt
+}
+
+
+define <19 x i8> @test_19xi8(<19 x i8> %x, <19 x i8>* %b) {
+  %bb = load <19 x i8>* %b
+  %tt = xor <19 x i8> %x, %bb
+  store <19 x i8> %tt, <19 x i8>* %b
+  br label %next
+
+next:
+  ret <19 x i8> %tt
+}
+
+
+define <19 x i16> @test_19xi16(<19 x i16> %x, <19 x i16>* %b) {
+  %bb = load <19 x i16>* %b
+  %tt = xor <19 x i16> %x, %bb
+  store <19 x i16> %tt, <19 x i16>* %b
+  br label %next
+
+next:
+  ret <19 x i16> %tt
+}
+
+
+define <19 x i32> @test_19xi32(<19 x i32> %x, <19 x i32>* %b) {
+  %bb = load <19 x i32>* %b
+  %tt = xor <19 x i32> %x, %bb
+  store <19 x i32> %tt, <19 x i32>* %b
+  br label %next
+
+next:
+  ret <19 x i32> %tt
+}
+
+
+define <19 x i64> @test_19xi64(<19 x i64> %x, <19 x i64>* %b) {
+  %bb = load <19 x i64>* %b
+  %tt = xor <19 x i64> %x, %bb
+  store <19 x i64> %tt, <19 x i64>* %b
+  br label %next
+
+next:
+  ret <19 x i64> %tt
+}
+
+
+define <19 x i128> @test_19xi128(<19 x i128> %x, <19 x i128>* %b) {
+  %bb = load <19 x i128>* %b
+  %tt = xor <19 x i128> %x, %bb
+  store <19 x i128> %tt, <19 x i128>* %b
+  br label %next
+
+next:
+  ret <19 x i128> %tt
+}
+
+
+define <19 x i256> @test_19xi256(<19 x i256> %x, <19 x i256>* %b) {
+  %bb = load <19 x i256>* %b
+  %tt = xor <19 x i256> %x, %bb
+  store <19 x i256> %tt, <19 x i256>* %b
+  br label %next
+
+next:
+  ret <19 x i256> %tt
+}
+
+
+define <19 x i512> @test_19xi512(<19 x i512> %x, <19 x i512>* %b) {
+  %bb = load <19 x i512>* %b
+  %tt = xor <19 x i512> %x, %bb
+  store <19 x i512> %tt, <19 x i512>* %b
+  br label %next
+
+next:
+  ret <19 x i512> %tt
+}
+





More information about the llvm-commits mailing list