[clang] [clang][X86] Add constexpr support for mpsadbw128/256 intrinsics (PR #202257)

via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 22 00:26:58 PDT 2026


================
@@ -1009,6 +1009,23 @@ __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
   // CHECK: call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, i8 3)
   return _mm256_mpsadbw_epu8(x, y, 3);
 }
+// imm=0 both lanes.  Lane0 A=4,B=1 -> 12 each ; Lane1 A=8,B=1 -> |8-1|*4=28 each
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 0),    353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310));
----------------
adream307 wrote:

OK, I add more tests ,and  checke on actual avx512 hardware
```cpp
#include <stdio.h>
#include <immintrin.h>

void printv8hu(__v8hu v) {
  for (int i=0; i !=8; ++i) {
    printf("%d,", v[i]);
  }
  printf("\n");
}

void printv16hu(__v16hu v) {
  for (int i=0; i !=16; ++i) {
    printf("%d,", v[i]);
  }
  printf("\n");
}

int main() {

  __v8hu v0 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                       ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                        7);
  __v8hu v1 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                       ((__m128i)(__v16qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
                                        7);  
  __v8hu v2 = (__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
                                       ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                        7);
  printv8hu(v0);
  printv8hu(v1);
  printv8hu(v2);

  __v16hu r0 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                            ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                            0);
  __v16hu r1 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                            ((__m256i)(__v32qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
                                            1);
  __v16hu r2 = (__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
                                            ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
                                            2);
  printv16hu(r0);
  printv16hu(r1);
  printv16hu(r2);

  return 0;
}
```
and the output is 
```txt
0,0,0,0,0,0,0,0,
1020,1020,1020,1020,1020,1020,1020,1020,
1020,1020,1020,1020,1020,1020,1020,1020,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,
1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,
```

https://github.com/llvm/llvm-project/pull/202257


More information about the cfe-commits mailing list