<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/62527>62527</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
arm64: suboptimal register allocation with spillage causing poor performance in matrix multiplication kernel
</td>
</tr>
<tr>
<th>Labels</th>
<td>
backend:ARM,
performance
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
bjacob
</td>
</tr>
</table>
<pre>
Compiler Explorer link: https://godbolt.org/z/6cTKvnTE9
Testcase inlined below for reference (at bottom of this comment).
To reproduce: compile to arm64 with these flags: `-O3 -march=armv8.2-a+i8mm` (the latter enables `i8mm` ISA extension, which the NEON intrinsics in the testcase require).
Expected result
========
The compiler should find a register allocation that doesn't require any spills. Here is GCC's output for the main loop:
```
.L4:
smmla v28.4s, v7.16b, v4.16b
smmla v23.4s, v7.16b, v3.16b
smmla v27.4s, v7.16b, v2.16b
smmla v22.4s, v7.16b, v1.16b
smmla v26.4s, v6.16b, v4.16b
smmla v21.4s, v6.16b, v3.16b
smmla v25.4s, v6.16b, v2.16b
smmla v20.4s, v6.16b, v1.16b
ldp q7, q6, [x4]
smmla v24.4s, v5.16b, v4.16b
smmla v31.4s, v5.16b, v3.16b
smmla v19.4s, v5.16b, v2.16b
smmla v30.4s, v5.16b, v1.16b
smmla v18.4s, v0.16b, v4.16b
smmla v29.4s, v0.16b, v3.16b
smmla v17.4s, v0.16b, v2.16b
smmla v16.4s, v0.16b, v1.16b
ldp q5, q0, [x4, 32]
add x4, x4, 64
ldp q4, q3, [x5]
ldp q2, q1, [x5, 32]
add x5, x5, 64
cmp x4, x1
bne .L4
```
Actual result
========
Clang fails to find a good register allocation and introduces spills, resulting in ~ 2x lower performance on ARM Cortex-X2. Here is Clang's output for the main loop:
```
.LBB0_4: // =>This Inner Loop Header: Depth=1
smmla v1.4s, v10.16b, v11.16b
add x11, sp, #96
smmla v31.4s, v0.16b, v11.16b
subs w8, w8, #1
smmla v27.4s, v15.16b, v11.16b
smmla v7.4s, v20.16b, v11.16b
mov v11.16b, v1.16b
ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x11] // 64-byte Folded Reload
smmla v9.4s, v0.16b, v14.16b
smmla v8.4s, v0.16b, v13.16b
smmla v30.4s, v0.16b, v12.16b
smmla v29.4s, v15.16b, v14.16b
smmla v28.4s, v15.16b, v13.16b
smmla v24.4s, v15.16b, v12.16b
smmla v26.4s, v20.16b, v14.16b
ldp q0, q15, [x10, #-48]
smmla v25.4s, v20.16b, v13.16b
smmla v19.4s, v20.16b, v12.16b
smmla v4.4s, v10.16b, v12.16b
smmla v6.4s, v10.16b, v14.16b
smmla v5.4s, v10.16b, v13.16b
ldp q20, q10, [x10, #-16]
st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x11] // 64-byte Folded Spill
add x10, x10, #64
mov v1.16b, v11.16b
ldp q14, q13, [x9, #-48]
ldp q11, q12, [x9, #-16]
add x9, x9, #64
b.ne .LBB0_4
```
Finding:
========
It seems that part of the register pressure causing the spills is coming from the local variable inside the `if (accumulate) {...}` block: disabling this block by changing that to `if (0) {...}` removes the spills.
As (by definition) these local variables inside the `if` branch are out of scope by the time we enter the main loop, they shouldn't have an impact on the register allocation of the code in the main loop.
Testcase:
========
```c
#include <stdint.h>
#include <stdbool.h>
#include <arm_neon.h>
#define ASSUME(x) \
do { \
if (!(x)) __builtin_unreachable(); \
} while (0)
static inline int32x4_t neon_zip1_s32_as_s64(int32x4_t a, int32x4_t b) {
return vreinterpretq_s32_s64(
vzip1q_s64(vreinterpretq_s64_s32(a), vreinterpretq_s64_s32(b)));
}
static inline int32x4_t neon_zip2_s32_as_s64(int32x4_t a, int32x4_t b) {
return vreinterpretq_s32_s64(
vzip2q_s64(vreinterpretq_s64_s32(a), vreinterpretq_s64_s32(b)));
}
static inline int32x4_t neon_uzp1_s32_as_s64(int32x4_t a, int32x4_t b) {
return vreinterpretq_s32_s64(
vuzp1q_s64(vreinterpretq_s64_s32(a), vreinterpretq_s64_s32(b)));
}
static inline int32x4_t neon_uzp2_s32_as_s64(int32x4_t a, int32x4_t b) {
return vreinterpretq_s32_s64(
vuzp2q_s64(vreinterpretq_s64_s32(a), vreinterpretq_s64_s32(b)));
}
void mmt4d_tile_i8i8i32_8x8x8_arm_64_i8mm(
void* restrict out_tile, const void* restrict lhs_panel,
const void* restrict rhs_panel, int32_t K, bool accumulate) {
const int8_t* restrict lhs_ptr = lhs_panel;
const int8_t* restrict rhs_ptr = rhs_panel;
int32_t* restrict out_ptr = out_tile;
int32x4_t acc_01_01, acc_01_23, acc_01_45, acc_01_67;
int32x4_t acc_23_01, acc_23_23, acc_23_45, acc_23_67;
int32x4_t acc_45_01, acc_45_23, acc_45_45, acc_45_67;
int32x4_t acc_67_01, acc_67_23, acc_67_45, acc_67_67;
// If `accumulate` is true, load accumulator registers from memory and swizzle
// into 2x2 tiles as expected by vmmlaq_s32.
// Note: disabling this if-block removes the Clang spilling issue, indicating
// that the register pressure causing the spill is partly caused by the
// variables scoped inside this if branch.
if (accumulate) {
int32x4_t acc_0_0123 = vld1q_s32(out_ptr + 8 * 0 + 0);
int32x4_t acc_0_4567 = vld1q_s32(out_ptr + 8 * 0 + 4);
int32x4_t acc_1_0123 = vld1q_s32(out_ptr + 8 * 1 + 0);
int32x4_t acc_1_4567 = vld1q_s32(out_ptr + 8 * 1 + 4);
int32x4_t acc_2_0123 = vld1q_s32(out_ptr + 8 * 2 + 0);
int32x4_t acc_2_4567 = vld1q_s32(out_ptr + 8 * 2 + 4);
int32x4_t acc_3_0123 = vld1q_s32(out_ptr + 8 * 3 + 0);
int32x4_t acc_3_4567 = vld1q_s32(out_ptr + 8 * 3 + 4);
int32x4_t acc_4_0123 = vld1q_s32(out_ptr + 8 * 4 + 0);
int32x4_t acc_4_4567 = vld1q_s32(out_ptr + 8 * 4 + 4);
int32x4_t acc_5_0123 = vld1q_s32(out_ptr + 8 * 5 + 0);
int32x4_t acc_5_4567 = vld1q_s32(out_ptr + 8 * 5 + 4);
int32x4_t acc_6_0123 = vld1q_s32(out_ptr + 8 * 6 + 0);
int32x4_t acc_6_4567 = vld1q_s32(out_ptr + 8 * 6 + 4);
int32x4_t acc_7_0123 = vld1q_s32(out_ptr + 8 * 7 + 0);
int32x4_t acc_7_4567 = vld1q_s32(out_ptr + 8 * 7 + 4);
acc_01_01 = neon_zip1_s32_as_s64(acc_0_0123, acc_1_0123);
acc_01_23 = neon_zip2_s32_as_s64(acc_0_0123, acc_1_0123);
acc_01_45 = neon_zip1_s32_as_s64(acc_0_4567, acc_1_4567);
acc_01_67 = neon_zip2_s32_as_s64(acc_0_4567, acc_1_4567);
acc_23_01 = neon_zip1_s32_as_s64(acc_2_0123, acc_3_0123);
acc_23_23 = neon_zip2_s32_as_s64(acc_2_0123, acc_3_0123);
acc_23_45 = neon_zip1_s32_as_s64(acc_2_4567, acc_3_4567);
acc_23_67 = neon_zip2_s32_as_s64(acc_2_4567, acc_3_4567);
acc_45_01 = neon_zip1_s32_as_s64(acc_4_0123, acc_5_0123);
acc_45_23 = neon_zip2_s32_as_s64(acc_4_0123, acc_5_0123);
acc_45_45 = neon_zip1_s32_as_s64(acc_4_4567, acc_5_4567);
acc_45_67 = neon_zip2_s32_as_s64(acc_4_4567, acc_5_4567);
acc_67_01 = neon_zip1_s32_as_s64(acc_6_0123, acc_7_0123);
acc_67_23 = neon_zip2_s32_as_s64(acc_6_0123, acc_7_0123);
acc_67_45 = neon_zip1_s32_as_s64(acc_6_4567, acc_7_4567);
acc_67_67 = neon_zip2_s32_as_s64(acc_6_4567, acc_7_4567);
} else {
acc_01_01 = vdupq_n_s32(0);
acc_01_23 = vdupq_n_s32(0);
acc_01_45 = vdupq_n_s32(0);
acc_01_67 = vdupq_n_s32(0);
acc_23_01 = vdupq_n_s32(0);
acc_23_23 = vdupq_n_s32(0);
acc_23_45 = vdupq_n_s32(0);
acc_23_67 = vdupq_n_s32(0);
acc_45_01 = vdupq_n_s32(0);
acc_45_23 = vdupq_n_s32(0);
acc_45_45 = vdupq_n_s32(0);
acc_45_67 = vdupq_n_s32(0);
acc_67_01 = vdupq_n_s32(0);
acc_67_23 = vdupq_n_s32(0);
acc_67_45 = vdupq_n_s32(0);
acc_67_67 = vdupq_n_s32(0);
}
// The number of loop iterations, K, is known to be at least 1.
ASSUME(K >= 1);
// Handle the initial loads out of the loop so that the loop body can start
// with arithmetic (loop pipelining optimization).
int8x16_t lhs01 = vld1q_s8(lhs_ptr + 0);
int8x16_t lhs23 = vld1q_s8(lhs_ptr + 16);
int8x16_t lhs45 = vld1q_s8(lhs_ptr + 32);
int8x16_t lhs67 = vld1q_s8(lhs_ptr + 48);
lhs_ptr += 64;
int8x16_t rhs01 = vld1q_s8(rhs_ptr + 0);
int8x16_t rhs23 = vld1q_s8(rhs_ptr + 16);
int8x16_t rhs45 = vld1q_s8(rhs_ptr + 32);
int8x16_t rhs67 = vld1q_s8(rhs_ptr + 48);
rhs_ptr += 64;
// Main loop.
for (int k = 0; k < K - 1; ++k) {
acc_01_01 = vmmlaq_s32(acc_01_01, lhs01, rhs01);
acc_01_23 = vmmlaq_s32(acc_01_23, lhs01, rhs23);
acc_01_45 = vmmlaq_s32(acc_01_45, lhs01, rhs45);
acc_01_67 = vmmlaq_s32(acc_01_67, lhs01, rhs67);
acc_23_01 = vmmlaq_s32(acc_23_01, lhs23, rhs01);
acc_23_23 = vmmlaq_s32(acc_23_23, lhs23, rhs23);
acc_23_45 = vmmlaq_s32(acc_23_45, lhs23, rhs45);
acc_23_67 = vmmlaq_s32(acc_23_67, lhs23, rhs67);
acc_45_01 = vmmlaq_s32(acc_45_01, lhs45, rhs01);
acc_45_23 = vmmlaq_s32(acc_45_23, lhs45, rhs23);
acc_45_45 = vmmlaq_s32(acc_45_45, lhs45, rhs45);
acc_45_67 = vmmlaq_s32(acc_45_67, lhs45, rhs67);
acc_67_01 = vmmlaq_s32(acc_67_01, lhs67, rhs01);
acc_67_23 = vmmlaq_s32(acc_67_23, lhs67, rhs23);
acc_67_45 = vmmlaq_s32(acc_67_45, lhs67, rhs45);
acc_67_67 = vmmlaq_s32(acc_67_67, lhs67, rhs67);
lhs01 = vld1q_s8(lhs_ptr + 0);
lhs23 = vld1q_s8(lhs_ptr + 16);
lhs45 = vld1q_s8(lhs_ptr + 32);
lhs67 = vld1q_s8(lhs_ptr + 48);
lhs_ptr += 64;
rhs01 = vld1q_s8(rhs_ptr + 0);
rhs23 = vld1q_s8(rhs_ptr + 16);
rhs45 = vld1q_s8(rhs_ptr + 32);
rhs67 = vld1q_s8(rhs_ptr + 48);
rhs_ptr += 64;
}
// Final arithmetic after the main loop.
acc_01_01 = vmmlaq_s32(acc_01_01, lhs01, rhs01);
acc_01_23 = vmmlaq_s32(acc_01_23, lhs01, rhs23);
acc_01_45 = vmmlaq_s32(acc_01_45, lhs01, rhs45);
acc_01_67 = vmmlaq_s32(acc_01_67, lhs01, rhs67);
acc_23_01 = vmmlaq_s32(acc_23_01, lhs23, rhs01);
acc_23_23 = vmmlaq_s32(acc_23_23, lhs23, rhs23);
acc_23_45 = vmmlaq_s32(acc_23_45, lhs23, rhs45);
acc_23_67 = vmmlaq_s32(acc_23_67, lhs23, rhs67);
acc_45_01 = vmmlaq_s32(acc_45_01, lhs45, rhs01);
acc_45_23 = vmmlaq_s32(acc_45_23, lhs45, rhs23);
acc_45_45 = vmmlaq_s32(acc_45_45, lhs45, rhs45);
acc_45_67 = vmmlaq_s32(acc_45_67, lhs45, rhs67);
acc_67_01 = vmmlaq_s32(acc_67_01, lhs67, rhs01);
acc_67_23 = vmmlaq_s32(acc_67_23, lhs67, rhs23);
acc_67_45 = vmmlaq_s32(acc_67_45, lhs67, rhs45);
acc_67_67 = vmmlaq_s32(acc_67_67, lhs67, rhs67);
// Swizzle back to linear layout and store to destination.
int32x4_t acc_0_0123 = neon_uzp1_s32_as_s64(acc_01_01, acc_01_23);
int32x4_t acc_0_4567 = neon_uzp1_s32_as_s64(acc_01_45, acc_01_67);
int32x4_t acc_1_0123 = neon_uzp2_s32_as_s64(acc_01_01, acc_01_23);
int32x4_t acc_1_4567 = neon_uzp2_s32_as_s64(acc_01_45, acc_01_67);
int32x4_t acc_2_0123 = neon_uzp1_s32_as_s64(acc_23_01, acc_23_23);
int32x4_t acc_2_4567 = neon_uzp1_s32_as_s64(acc_23_45, acc_23_67);
int32x4_t acc_3_0123 = neon_uzp2_s32_as_s64(acc_23_01, acc_23_23);
int32x4_t acc_3_4567 = neon_uzp2_s32_as_s64(acc_23_45, acc_23_67);
int32x4_t acc_4_0123 = neon_uzp1_s32_as_s64(acc_45_01, acc_45_23);
int32x4_t acc_4_4567 = neon_uzp1_s32_as_s64(acc_45_45, acc_45_67);
int32x4_t acc_5_0123 = neon_uzp2_s32_as_s64(acc_45_01, acc_45_23);
int32x4_t acc_5_4567 = neon_uzp2_s32_as_s64(acc_45_45, acc_45_67);
int32x4_t acc_6_0123 = neon_uzp1_s32_as_s64(acc_67_01, acc_67_23);
int32x4_t acc_6_4567 = neon_uzp1_s32_as_s64(acc_67_45, acc_67_67);
int32x4_t acc_7_0123 = neon_uzp2_s32_as_s64(acc_67_01, acc_67_23);
int32x4_t acc_7_4567 = neon_uzp2_s32_as_s64(acc_67_45, acc_67_67);
vst1q_s32(out_ptr + 8 * 0 + 0, acc_0_0123);
vst1q_s32(out_ptr + 8 * 0 + 4, acc_0_4567);
vst1q_s32(out_ptr + 8 * 1 + 0, acc_1_0123);
vst1q_s32(out_ptr + 8 * 1 + 4, acc_1_4567);
vst1q_s32(out_ptr + 8 * 2 + 0, acc_2_0123);
vst1q_s32(out_ptr + 8 * 2 + 4, acc_2_4567);
vst1q_s32(out_ptr + 8 * 3 + 0, acc_3_0123);
vst1q_s32(out_ptr + 8 * 3 + 4, acc_3_4567);
vst1q_s32(out_ptr + 8 * 4 + 0, acc_4_0123);
vst1q_s32(out_ptr + 8 * 4 + 4, acc_4_4567);
vst1q_s32(out_ptr + 8 * 5 + 0, acc_5_0123);
vst1q_s32(out_ptr + 8 * 5 + 4, acc_5_4567);
vst1q_s32(out_ptr + 8 * 6 + 0, acc_6_0123);
vst1q_s32(out_ptr + 8 * 6 + 4, acc_6_4567);
vst1q_s32(out_ptr + 8 * 7 + 0, acc_7_0123);
vst1q_s32(out_ptr + 8 * 7 + 4, acc_7_4567);
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzEW0tv47qS_jXKhoghUS97kUWecxp9-1zgdA8wO4OS6Ji3KVIhaSfpxfz2Aam3Rcl0w2dOEPghVX36qkhWFR9GUpJXhvGdFz948dMNOqg9F3fZf1DOs5uMF593j7ysCMUCPH9UlAssACXspxfeg71SlfTCew--ePDllRcZp2rFxasHX3558CXJf3w9sh_PG89_8vz7-vUHlipHEgPCKGG4ABmm_B3suAAC77DALMfAg2ukQMaV4iXgO6D2RIKclyVmyoOb1QiQA4ErwYtDjjWrvOYLFAdIlEkE3onaA7XHEoMdRa-aMfAS__bfIbgtkcj3XviERHlcr-At8uADWZell_gAaBpqjwFFSmEBMEMZxVLrtiJfvt8D_KEwk4QzDz6C9z3JzcPAn8___hMQpgRhkuQSEGYuq9Z8gd8OROBTa54_KpwrXACB5YGq5lb4ZP8fumGPW9MFkHt-oAXYEVYABAR-JVJbgCjlOVKEay5IgYJjyTyYqpYNQOwTyIpQKlfgDywwIBL81-OjB1MJ-EFVB2VaSltSIsIA5bzSPaAmkvjNv_m6-lfU3QLNnyxLigAAR7heRVJ77JiugiQznyLzaUY-nMqHS_LpVB4uycOpfDAnf4RJK524sQ-m8kP2vWA8FVyk7U_lh7RpURmFt1Tfekv0qxc_fERe_DQHGbWQsdW2TjAMpoJLTRJspvJLtoX-VH62STR-16V8t0bZTOVn-R-DdCq9xD5IpvLWlolNy_h9y8BHEMJJ-6CiMO-1QP2aRGOZDtPcfQtbzHiC1klCIxn0kmeebgTq19On52U1ZBiM72YMm3cdFWzRon69z9UB0YuD3yNF7BXsEKFSx_0m7r1yXliDH2KFCc0mZ8gm4GnS9XMJe9Xh-n8B_ACUv2MBKix2XJRI5ybOwP1f38AjFwp_3P4P7AOlYfGbofLhwd9GJjOZZAqMjc8_dN77whgW4F-cV-APjAostNgTrpROXMFc92t7XzDofpbR0zVsYPqArExPgOEmmRuVgaVf24blIZP6_X1t8uK6wZ3j28frIF7GbTU6BbhMpOTH7sZMCKFFYN699EHfh0UzupsPYfshWsECeOlTO1qCwIuf2hZLotvsU2HwwmmBC_AXphwVM9wtkSewxlhLSAuWYmwfMwcK0JppNjaHL8bLtU1jMQlHNo3ZqDlIq3DGMafhy6_DV9y1iN_0s9to3QcxS3qFjg7tkxac8eipRmQbeUsKiU1hqSFim4LFCO2lN9i4yJ-6KEimVYC6_kj4rsPrXNTx61zRcjrNKfXgXRzfXV8I6qwXdGlvY-sLQ9_UMe8tgBMNi2s6zkaoEz1lnK0YBm08n090L4QVhL32CcEhy31RQGJcyrp4r5BQ9dwI90muEljKg8AgRwep85i-W-c3UE-h9MWd4KW5ozMiBUckiJ7aAD1VKbC5oyc5OzMPy_NDeaBI6amK7her1Uo3fOKDjPLcTAQLIlFG68cRWV8H2SfI94i91peR0om5Q_UnYAKX_IjlgPBoXnQvtVr2CQq8I4woM9_aNNO6sRlyYochKxDL9wAJrLOzdpzMeYU1TTMvIyUG7xhgpt04TtrwUV_4bGZV9YRpj456tgRIWaFcAc7G7TAoNpomynmB20lgB72yzYwv6hNd58qb7zAkLKeHAgMvfJSqIEyt9l74PHM745zO3kei3DLM2VCgFTPtgMH99-___e3Zg-sP3RyTPy9-7IdHwU1cGV3Tf3WX8GDQwGik7TY7EF2KbQ9MYJTvdcsaqY0XnmB46ZOeeFPc9qwhVamQInmz2qCrvhB-RFsFtF3bX6QKtjKEWyS3Mok8uO4FkG73_mvW9Nj-sQKrg2DgKDDRnaYSWL0ZsBqpETzqZ7w1105kk0jL60FmrH48xeruZ7VXauMb69KnS8yEf7OZzZ-2Fv7D1h5-_e2N2lqrH_XPW_v_1bb6UX-_tUdOClCWKiq2ilC8JWuyJiHcrj_WH-utDklJtDULcEOCWsuD93oGpwTREfmgjL4mk3Mm1VSE7uW2QgxTDw6iyYywGAjXPt0q8FV_0SEUTNLkKSBhar1V0-croed6Ay6tW-b1xEBPTPV0RK35TRzSanXOmSjV_SbPt36w9U151HyB4eBLFA--JGkPM8aA4QADhgMMGA4wYDjCmFCJ4gFMFA9gongAE8XLMEk6gEnSAUySDmCS9BSmKWu_7HQ1MWjoxNc1lRIH08n0dK_vBmY5uy4GZF1vlbjk4tOsPch38usXxZMnEKY4gB8Q6LaRAEmA2-Xg7BMcdf1vhudqovknV9hSiZHdbV2MDaurerHE1FhmpUPK2gBdkeqahb1O4Ov6za3Q1D7RtSn9NPdq7mo_tbav10wlVvRlm2HeVGwDW2cK0pNiYtyLt34AQ9Pnj7QI3poo1I0F-ADWQA8T33z2h3FpDjGKk9QZMTqPGLhyDFw5Bq4cA1eO0JUjdOUIXTlCV46hK8fQlWPoyjF05Ri5coxcOUauHCNXjrErx9iVY-zKMXblmLhyTFw5Jq4cE1eOqSvH1JVj6soxnefY5XYDMzML6kNnmxWD5tssYGPpzHzjNwCj-DxD7Y4esP42C9h4bpGhO6Cpbc4xhCOTw0WTTX10juGFgOd9CEcmh-dMPu_DSwBNYXeOYTQyOV402RSH5xheCHjeh9HI5Picyed9eAmgqWrPMUxGJqeLJpvK-BzDCwHP-zAZmZyeM_m8D10AvfQJYCrxoIoch8djcajetqyJs9YIPQ5_SwrjuOYO3Ub8M9B9QHKE7gOOA_RlrPtIcQ66DwGO0P0Qd4C-jHU_Ns9B94POEbofVA7Ql7HuR8OSQrfIAvoJ2I89BuxQZlgAvjPr0YAoLMyytdlgMssbRIKfjL8zoDjIMEAKUIykAkE7M-uWgL8CL3zWRILJQKsf-AdiBa3X5c0aPqJm1izbFfl6S4JXQPJ-ymkuZLzQ00kGpEJCTYDNES8kiNqXWJFcTxONWkUqTAnT81NeKVKSX6jZOViNVgnWH0GyNQsybbPW5dVaA7WrLZMabaQ4rvROFYPk1CUj5bbF7cq6OReUxxXhqXK0HikPbmmlJJrBFTZXCAdXCJsrhKMrhM0VwtEVwuYKMe8KseiKpmd9O9mq0bd2XCutCVPgp3me74UP5uMj-ApuQWB2KOCDBx9-WtcoTpJMt67TlqHt0pvpj-ZcSv1hmkxaV1sg6vQ8hHApt21I9drYEElfOZuvLEh1Qh4iuRTYE6RuXZHWRtkcZE90FqTOTx2SS0ltQ-r81CHN-2mQIS1InZ86JJcieoLULZzSmsoZPw1SqwWp81OH5FI225A6P3VI834a5GQLUuenDsmlUJ4gdSvDtAY446dBMrcgdX7qkFxKYxtS56cOad5PgyrAgtT5qUOy--nCBNhrXZr9GrVL816jdsWMp_8uzHW91qWJrlG7KMVdmtX6xywlN0s1-EIYosMiCu0mRyEGSfAKOWw6e_rNNHaFFHad3HWdvHWdnHWFfHWdRHWdJHWdBHWF5HSdrHSdjHSdbHSFTHSdFNREou_19ijIUP5TzzopYRgJQNGnnieaDVTFhfmFT4GlIsxM6lazO9n9OvzM-RA0s989mWnY9wGXUU_3yhdRgylX2zr1hVyDKVc76kVcoZNfbWcAllFd_Go5P7CIGjr59VKuoZNfL-UaOfnVdihiGdXFr5YDFYuosZNfL-UaO_n1Uq6Jk19tp0SWUV38ajlhsoiaOvn1Uq6pk1_Pcz1K5XKU4nEQfn8HIuohJqv3y_rBmIJ9r9EFIlreDFyGgGMWcMrCRT_q9S-nEI4p2DcMXSCihR29Zf1oTCH6DQrRmEL0G46Ixywsu34u-tHyntwyRDKmkPyGI5Ixi-TStkjHFOz7di4Q0fzGWn-csz2aflPchcUm3KAbfBck6zAIYbLe3OzvUJJitPHzdL1Z4wL7CcRFjtf-roiCGPrRDbmDPgz92A-DJA7gZhUH8W6N1sku3IVFCiMv8nGJCF1ReixXXLzemONsdwmMYXpDUYapNL9th1BXk5gVXnh__9c3D0IPPnoQDn7Zp6_FTzfiTmPdZodX6UU-JVLJHl0RRfGd-V25F94DecjMvoL5yeL06L_ZljCH4tBrf1Ku4nz8i0LCQImUIB-gPFBFKkoa_Z9YMExvDoLenfzWnqj9IVvlvPTgiybXvN1Wgv8H58qDL8YP0oMvxhX_FwAA__-qympz">