summaryrefslogtreecommitdiffstats
path: root/thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp')
-rw-r--r--thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp140
1 files changed, 0 insertions, 140 deletions
diff --git a/thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp b/thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp
deleted file mode 100644
index b8d5c4fcaf..0000000000
--- a/thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include "c_types_map.hpp"
-#include "utils.hpp"
-
-#include "jit_generator.hpp"
-
-#include "cpu_batch_normalization_utils.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-namespace bnorm_utils {
-
-void cache_balance(size_t working_set_size, dim_t C_blks,
- dim_t &C_blks_per_iter, int64_t &iters) {
- int nthrs = mkldnn_get_max_threads();
- int l3_size = get_cache_size(3, true) * nthrs / 2;
-
- C_blks_per_iter = l3_size / working_set_size;
-
- if (C_blks_per_iter == 0)
- C_blks_per_iter = 1;
- if (C_blks_per_iter > C_blks)
- C_blks_per_iter = C_blks;
-
- iters = (C_blks + C_blks_per_iter - 1) / C_blks_per_iter;
-}
-
-bool thread_balance(bool do_blocking, bool spatial_thr_allowed, int ithr,
- int nthr, dim_t N, dim_t C_blks, dim_t SP, int &C_ithr, int &C_nthr,
- dim_t &C_blk_s, dim_t &C_blk_e, int &N_ithr, int &N_nthr, dim_t &N_s,
- dim_t &N_e, int &S_ithr, int &S_nthr, dim_t &S_s, dim_t &S_e) {
- if (nthr <= C_blks || !mkldnn_thr_syncable()) {
- C_ithr = ithr; C_nthr = nthr;
- N_ithr = 0; N_nthr = 1;
- S_ithr = 0; S_nthr = 1;
- N_s = 0; N_e = N; S_s = 0; S_e = SP;
- balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e);
- } else {
- if (do_blocking) {
- N_nthr = (int)nstl::min<dim_t>(N, nthr);
- C_nthr = (int)nstl::min<dim_t>(C_blks, nthr / N_nthr);
- S_nthr = (int)nstl::min<dim_t>(SP, nthr / (C_nthr * N_nthr));
- } else {
- C_nthr = (int)math::gcd((dim_t)nthr, C_blks);
- N_nthr = (int)nstl::min<dim_t>(N, nthr / C_nthr);
- S_nthr = (int)nstl::min<dim_t>(SP, nthr / (C_nthr * N_nthr));
- }
-
- if (!spatial_thr_allowed)
- S_nthr = 1;
-
- if (S_nthr < 1) S_nthr = 1;
- if (ithr < C_nthr * N_nthr * S_nthr) {
- N_ithr = (ithr / S_nthr) % N_nthr ;
- C_ithr = ithr / (N_nthr * S_nthr);
- S_ithr = ithr % S_nthr;
- balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e);
- balance211(N, N_nthr, N_ithr, N_s, N_e);
- balance211(SP, S_nthr, S_ithr, S_s, S_e);
- } else {
- S_ithr = N_ithr = C_ithr = -ithr;
- S_s = S_e = N_s = N_e = C_blk_s = C_blk_e = -1;
- }
- }
-
- // spatial_thr_allowed is meant to help maintain
- // consistent decisions about spatial threading
- // between mutiple invocations of this routine.
- // It is caller's responsibility to check the
- // return value and pass it as a flag to the
- // next call if needed.
- if (S_nthr == 1)
- spatial_thr_allowed = false;
-
- return spatial_thr_allowed;
-}
-
-bool is_spatial_thr(const batch_normalization_pd_t *bdesc, int simd_w,
- int data_size) {
- if (!mkldnn_thr_syncable()) return false;
-
- dim_t nthr = mkldnn_get_max_threads();
- dim_t SP = bdesc->W() * bdesc->D() * bdesc->H();
- dim_t C_PADDED = memory_desc_wrapper(bdesc->src_md())
- .padded_dims()[1];
- assert(C_PADDED % simd_w == 0);
-
- size_t data = bdesc->MB() * C_PADDED * SP * data_size;
- size_t l3_size_ = get_cache_size(3, true) * nthr / 2;
- bool do_blocking = (data >= l3_size_ / 2 && l3_size_ > 0);
- dim_t C_blks_per_iter{ 1 }, iters{ 1 };
- dim_t C_blks = C_PADDED / simd_w;
-
- if (do_blocking) {
- int num_tensors = bdesc->is_fwd() ? 1 : 2;
- size_t working_set_size
- = (bdesc->MB() * SP * simd_w * data_size) * num_tensors;
- cache_balance(working_set_size, C_blks, C_blks_per_iter, iters);
- }
-
- // Spatial threading decision made in this function shall be consistent
- // with thread_balance() behavior.
- C_blks = do_blocking ? C_blks_per_iter : C_blks;
-
- if (nthr <= C_blks) return false;
-
- dim_t S_nthr = 1;
- if (do_blocking) {
- dim_t N_nthr = nstl::min(bdesc->MB(), nthr);
- dim_t C_nthr = nstl::min(C_blks, nthr / N_nthr);
- S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
- } else {
- dim_t C_nthr = math::gcd(nthr, C_blks);
- dim_t N_nthr = nstl::min(bdesc->MB(), nthr / C_nthr);
- S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
- }
-
- return S_nthr > 1;
-}
-
-}
-}
-}
-}