summaryrefslogtreecommitdiffstats
path: root/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
diff options
context:
space:
mode:
authorRémi Verschelde <remi@verschelde.fr>2021-05-13 14:55:49 +0200
committerGitHub <noreply@github.com>2021-05-13 14:55:49 +0200
commita40a08827c88a054e0891b2d791c1368b06e7aa9 (patch)
tree4ef44094d79b4392284b72c21af10fefd166fa9d /thirdparty/basis_universal/encoder/basisu_kernels_imp.h
parenta7e5b99a9effaf84d67733e3413a7f945072a3cb (diff)
parent2d133177e979bd68eba1a241cae84a8215087993 (diff)
downloadredot-engine-a40a08827c88a054e0891b2d791c1368b06e7aa9.tar.gz
Merge pull request #46568 from akien-mga/basisu_20210130
basis_universal: Update to upstream commit from Apr 16, 2021
Diffstat (limited to 'thirdparty/basis_universal/encoder/basisu_kernels_imp.h')
-rw-r--r--thirdparty/basis_universal/encoder/basisu_kernels_imp.h584
1 files changed, 584 insertions, 0 deletions
diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_imp.h b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
new file mode 100644
index 0000000000..046880517b
--- /dev/null
+++ b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h
@@ -0,0 +1,584 @@
+// basisu_kernels_imp.h - Do not directly include
+// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using namespace CPPSPMD;
+
+namespace CPPSPMD_NAME(basisu_kernels_namespace)
+{
+ struct perceptual_distance_rgb_4_N : spmd_kernel
+ {
+ void _call(int64_t* pDistance,
+ const uint8_t* pSelectors,
+ const color_rgba* pBlock_colors,
+ const color_rgba* pSrc_pixels, uint32_t n,
+ int64_t early_out_err)
+ {
+ assert(early_out_err >= 0);
+
+ *pDistance = 0;
+
+ __m128i block_colors[4];
+ vint block_colors_r[4], block_colors_g[4], block_colors_b[4];
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ block_colors[i] = load_rgba32(&pBlock_colors[i]);
+ store_all(block_colors_r[i], (int)pBlock_colors[i].r);
+ store_all(block_colors_g[i], (int)pBlock_colors[i].g);
+ store_all(block_colors_b[i], (int)pBlock_colors[i].b);
+ }
+
+ uint32_t i;
+ for (i = 0; (i + 4) <= n; i += 4)
+ {
+ __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]);
+
+ vint r, g, b, a;
+ transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3);
+
+ int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3];
+
+ vint base_r, base_g, base_b, base_a;
+ if ((s0 == s1) && (s0 == s2) && (s0 == s3))
+ {
+ store_all(base_r, block_colors_r[s0]);
+ store_all(base_g, block_colors_g[s0]);
+ store_all(base_b, block_colors_b[s0]);
+ }
+ else
+ {
+ __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3];
+ transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3);
+ }
+
+ vint dr = base_r - r;
+ vint dg = base_g - g;
+ vint db = base_b - b;
+
+ vint delta_l = dr * 27 + dg * 92 + db * 9;
+ vint delta_cr = dr * 128 - delta_l;
+ vint delta_cb = db * 128 - delta_l;
+
+ vint id = ((delta_l * delta_l) >> 7) +
+ ((((delta_cr * delta_cr) >> 7) * 26) >> 7) +
+ ((((delta_cb * delta_cb) >> 7) * 3) >> 7);
+
+ *pDistance += reduce_add(id);
+ if (*pDistance >= early_out_err)
+ return;
+ }
+
+ for (; i < n; i++)
+ {
+ int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+
+ int sel = pSelectors[i];
+ int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b;
+
+ int dr = base_r - r;
+ int dg = base_g - g;
+ int db = base_b - b;
+
+ int delta_l = dr * 27 + dg * 92 + db * 9;
+ int delta_cr = dr * 128 - delta_l;
+ int delta_cb = db * 128 - delta_l;
+
+ int id = ((delta_l * delta_l) >> 7) +
+ ((((delta_cr * delta_cr) >> 7) * 26) >> 7) +
+ ((((delta_cb * delta_cb) >> 7) * 3) >> 7);
+
+ *pDistance += id;
+ if (*pDistance >= early_out_err)
+ return;
+ }
+ }
+ };
+
+ struct linear_distance_rgb_4_N : spmd_kernel
+ {
+ void _call(int64_t* pDistance,
+ const uint8_t* pSelectors,
+ const color_rgba* pBlock_colors,
+ const color_rgba* pSrc_pixels, uint32_t n,
+ int64_t early_out_err)
+ {
+ assert(early_out_err >= 0);
+
+ *pDistance = 0;
+
+ __m128i block_colors[4];
+ vint block_colors_r[4], block_colors_g[4], block_colors_b[4];
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ block_colors[i] = load_rgba32(&pBlock_colors[i]);
+ store_all(block_colors_r[i], (int)pBlock_colors[i].r);
+ store_all(block_colors_g[i], (int)pBlock_colors[i].g);
+ store_all(block_colors_b[i], (int)pBlock_colors[i].b);
+ }
+
+ uint32_t i;
+ for (i = 0; (i + 4) <= n; i += 4)
+ {
+ __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]);
+
+ vint r, g, b, a;
+ transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3);
+
+ int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3];
+
+ vint base_r, base_g, base_b, base_a;
+ if ((s0 == s1) && (s0 == s2) && (s0 == s3))
+ {
+ store_all(base_r, block_colors_r[s0]);
+ store_all(base_g, block_colors_g[s0]);
+ store_all(base_b, block_colors_b[s0]);
+ }
+ else
+ {
+ __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3];
+ transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3);
+ }
+
+ vint dr = base_r - r;
+ vint dg = base_g - g;
+ vint db = base_b - b;
+
+ vint id = dr * dr + dg * dg + db * db;
+
+ *pDistance += reduce_add(id);
+ if (*pDistance >= early_out_err)
+ return;
+ }
+
+ for (; i < n; i++)
+ {
+ int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+
+ int sel = pSelectors[i];
+ int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b;
+
+ int dr = base_r - r;
+ int dg = base_g - g;
+ int db = base_b - b;
+
+ int id = dr * dr + dg * dg + db * db;
+
+ *pDistance += id;
+ if (*pDistance >= early_out_err)
+ return;
+ }
+ }
+ };
+
+ struct find_selectors_perceptual_rgb_4_N : spmd_kernel
+ {
+ inline vint compute_dist(
+ const vint& base_r, const vint& base_g, const vint& base_b,
+ const vint& r, const vint& g, const vint& b)
+ {
+ vint dr = base_r - r;
+ vint dg = base_g - g;
+ vint db = base_b - b;
+
+ vint delta_l = dr * 27 + dg * 92 + db * 9;
+ vint delta_cr = dr * 128 - delta_l;
+ vint delta_cb = db * 128 - delta_l;
+
+ vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 7) +
+ VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 7) * 26, 7) +
+ VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 7) * 3, 7);
+
+ return id;
+ }
+
+ void _call(int64_t* pDistance,
+ uint8_t* pSelectors,
+ const color_rgba* pBlock_colors,
+ const color_rgba* pSrc_pixels, uint32_t n,
+ int64_t early_out_err)
+ {
+ assert(early_out_err >= 0);
+
+ *pDistance = 0;
+
+ vint block_colors_r[4], block_colors_g[4], block_colors_b[4];
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ store_all(block_colors_r[i], (int)pBlock_colors[i].r);
+ store_all(block_colors_g[i], (int)pBlock_colors[i].g);
+ store_all(block_colors_b[i], (int)pBlock_colors[i].b);
+ }
+
+ const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0);
+
+ uint32_t i;
+
+ for (i = 0; (i + 4) <= n; i += 4)
+ {
+ __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]);
+
+ vint r, g, b, a;
+ transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3);
+
+ vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b);
+ vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b);
+ vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b);
+ vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b);
+
+ vint min_dist = min(min(min(dist0, dist1), dist2), dist3);
+
+ vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3)));
+
+ __m128i vsels = shuffle_epi8(sels.m_value, shuf);
+ storeu_si32((void *)(pSelectors + i), vsels);
+
+ *pDistance += reduce_add(min_dist);
+ if (*pDistance >= early_out_err)
+ return;
+ }
+
+ for (; i < n; i++)
+ {
+ int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+
+ int best_err = INT_MAX, best_sel = 0;
+ for (int sel = 0; sel < 4; sel++)
+ {
+ int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b;
+
+ int dr = base_r - r;
+ int dg = base_g - g;
+ int db = base_b - b;
+
+ int delta_l = dr * 27 + dg * 92 + db * 9;
+ int delta_cr = dr * 128 - delta_l;
+ int delta_cb = db * 128 - delta_l;
+
+ int id = ((delta_l * delta_l) >> 7) +
+ ((((delta_cr * delta_cr) >> 7) * 26) >> 7) +
+ ((((delta_cb * delta_cb) >> 7) * 3) >> 7);
+ if (id < best_err)
+ {
+ best_err = id;
+ best_sel = sel;
+ }
+ }
+
+ pSelectors[i] = (uint8_t)best_sel;
+
+ *pDistance += best_err;
+ if (*pDistance >= early_out_err)
+ return;
+ }
+ }
+ };
+
+ struct find_selectors_linear_rgb_4_N : spmd_kernel
+ {
+ inline vint compute_dist(
+ const vint& base_r, const vint& base_g, const vint& base_b,
+ const vint& r, const vint& g, const vint& b)
+ {
+ vint dr = base_r - r;
+ vint dg = base_g - g;
+ vint db = base_b - b;
+
+ vint id = dr * dr + dg * dg + db * db;
+ return id;
+ }
+
+ void _call(int64_t* pDistance,
+ uint8_t* pSelectors,
+ const color_rgba* pBlock_colors,
+ const color_rgba* pSrc_pixels, uint32_t n,
+ int64_t early_out_err)
+ {
+ assert(early_out_err >= 0);
+
+ *pDistance = 0;
+
+ vint block_colors_r[4], block_colors_g[4], block_colors_b[4];
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ store_all(block_colors_r[i], (int)pBlock_colors[i].r);
+ store_all(block_colors_g[i], (int)pBlock_colors[i].g);
+ store_all(block_colors_b[i], (int)pBlock_colors[i].b);
+ }
+
+ const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0);
+
+ uint32_t i;
+
+ for (i = 0; (i + 4) <= n; i += 4)
+ {
+ __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]);
+
+ vint r, g, b, a;
+ transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3);
+
+ vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b);
+ vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b);
+ vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b);
+ vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b);
+
+ vint min_dist = min(min(min(dist0, dist1), dist2), dist3);
+
+ vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3)));
+
+ __m128i vsels = shuffle_epi8(sels.m_value, shuf);
+ storeu_si32((void *)(pSelectors + i), vsels);
+
+ *pDistance += reduce_add(min_dist);
+ if (*pDistance >= early_out_err)
+ return;
+ }
+
+ for (; i < n; i++)
+ {
+ int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+
+ int best_err = INT_MAX, best_sel = 0;
+ for (int sel = 0; sel < 4; sel++)
+ {
+ int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b;
+
+ int dr = base_r - r;
+ int dg = base_g - g;
+ int db = base_b - b;
+
+ int id = dr * dr + dg * dg + db * db;
+ if (id < best_err)
+ {
+ best_err = id;
+ best_sel = sel;
+ }
+ }
+
+ pSelectors[i] = (uint8_t)best_sel;
+
+ *pDistance += best_err;
+ if (*pDistance >= early_out_err)
+ return;
+ }
+ }
+ };
+
+ struct find_lowest_error_perceptual_rgb_4_N : spmd_kernel
+ {
+ inline vint compute_dist(
+ const vint& base_r, const vint& base_g, const vint& base_b,
+ const vint& r, const vint& g, const vint& b)
+ {
+ vint dr = base_r - r;
+ vint dg = base_g - g;
+ vint db = base_b - b;
+
+ vint delta_l = dr * 27 + dg * 92 + db * 9;
+ vint delta_cr = dr * 128 - delta_l;
+ vint delta_cb = db * 128 - delta_l;
+
+ vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 7) +
+ VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 7) * 26, 7) +
+ VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 7) * 3, 7);
+
+ return id;
+ }
+
+ void _call(int64_t* pDistance,
+ const color_rgba* pBlock_colors,
+ const color_rgba* pSrc_pixels, uint32_t n,
+ int64_t early_out_error)
+ {
+ assert(early_out_error >= 0);
+
+ *pDistance = 0;
+
+ vint block_colors_r[4], block_colors_g[4], block_colors_b[4];
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ store_all(block_colors_r[i], (int)pBlock_colors[i].r);
+ store_all(block_colors_g[i], (int)pBlock_colors[i].g);
+ store_all(block_colors_b[i], (int)pBlock_colors[i].b);
+ }
+
+ uint32_t i;
+
+ for (i = 0; (i + 4) <= n; i += 4)
+ {
+ __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]);
+
+ vint r, g, b, a;
+ transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3);
+
+ vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b);
+ vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b);
+ vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b);
+ vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b);
+
+ vint min_dist = min(min(min(dist0, dist1), dist2), dist3);
+
+ *pDistance += reduce_add(min_dist);
+ if (*pDistance > early_out_error)
+ return;
+ }
+
+ for (; i < n; i++)
+ {
+ int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+
+ int best_err = INT_MAX;
+ for (int sel = 0; sel < 4; sel++)
+ {
+ int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b;
+
+ int dr = base_r - r;
+ int dg = base_g - g;
+ int db = base_b - b;
+
+ int delta_l = dr * 27 + dg * 92 + db * 9;
+ int delta_cr = dr * 128 - delta_l;
+ int delta_cb = db * 128 - delta_l;
+
+ int id = ((delta_l * delta_l) >> 7) +
+ ((((delta_cr * delta_cr) >> 7) * 26) >> 7) +
+ ((((delta_cb * delta_cb) >> 7) * 3) >> 7);
+
+ if (id < best_err)
+ {
+ best_err = id;
+ }
+ }
+
+ *pDistance += best_err;
+ if (*pDistance > early_out_error)
+ return;
+ }
+ }
+ };
+
+ struct find_lowest_error_linear_rgb_4_N : spmd_kernel
+ {
+ inline vint compute_dist(
+ const vint& base_r, const vint& base_g, const vint& base_b,
+ const vint& r, const vint& g, const vint& b)
+ {
+ vint dr = base_r - r;
+ vint dg = base_g - g;
+ vint db = base_b - b;
+
+ vint id = dr * dr + dg * dg + db * db;
+
+ return id;
+ }
+
+ void _call(int64_t* pDistance,
+ const color_rgba* pBlock_colors,
+ const color_rgba* pSrc_pixels, uint32_t n,
+ int64_t early_out_error)
+ {
+ assert(early_out_error >= 0);
+
+ *pDistance = 0;
+
+ vint block_colors_r[4], block_colors_g[4], block_colors_b[4];
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ store_all(block_colors_r[i], (int)pBlock_colors[i].r);
+ store_all(block_colors_g[i], (int)pBlock_colors[i].g);
+ store_all(block_colors_b[i], (int)pBlock_colors[i].b);
+ }
+
+ uint32_t i;
+
+ for (i = 0; (i + 4) <= n; i += 4)
+ {
+ __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]);
+
+ vint r, g, b, a;
+ transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3);
+
+ vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b);
+ vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b);
+ vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b);
+ vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b);
+
+ vint min_dist = min(min(min(dist0, dist1), dist2), dist3);
+
+ *pDistance += reduce_add(min_dist);
+ if (*pDistance > early_out_error)
+ return;
+ }
+
+ for (; i < n; i++)
+ {
+ int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
+
+ int best_err = INT_MAX;
+ for (int sel = 0; sel < 4; sel++)
+ {
+ int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b;
+
+ int dr = base_r - r;
+ int dg = base_g - g;
+ int db = base_b - b;
+
+ int id = dr * dr + dg * dg + db * db;
+
+ if (id < best_err)
+ {
+ best_err = id;
+ }
+ }
+
+ *pDistance += best_err;
+ if (*pDistance > early_out_error)
+ return;
+ }
+ }
+ };
+
+} // namespace
+
+using namespace CPPSPMD_NAME(basisu_kernels_namespace);
+
+void CPPSPMD_NAME(perceptual_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err)
+{
+ spmd_call< perceptual_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err);
+}
+
+void CPPSPMD_NAME(linear_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err)
+{
+ spmd_call< linear_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err);
+}
+
+void CPPSPMD_NAME(find_selectors_perceptual_rgb_4_N)(int64_t *pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err)
+{
+ spmd_call< find_selectors_perceptual_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err);
+}
+
+void CPPSPMD_NAME(find_selectors_linear_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err)
+{
+ spmd_call< find_selectors_linear_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err);
+}
+
+void CPPSPMD_NAME(find_lowest_error_perceptual_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error)
+{
+ spmd_call< find_lowest_error_perceptual_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error);
+}
+
+void CPPSPMD_NAME(find_lowest_error_linear_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error)
+{
+ spmd_call< find_lowest_error_linear_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error);
+}
+