diff options
Diffstat (limited to 'thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp')
-rw-r--r-- | thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp | 140 |
1 files changed, 0 insertions, 140 deletions
diff --git a/thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp b/thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp deleted file mode 100644 index b8d5c4fcaf..0000000000 --- a/thirdparty/oidn/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/******************************************************************************* -* Copyright 2018 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - -#include "c_types_map.hpp" -#include "utils.hpp" - -#include "jit_generator.hpp" - -#include "cpu_batch_normalization_utils.hpp" - -namespace mkldnn { -namespace impl { -namespace cpu { -namespace bnorm_utils { - -void cache_balance(size_t working_set_size, dim_t C_blks, - dim_t &C_blks_per_iter, int64_t &iters) { - int nthrs = mkldnn_get_max_threads(); - int l3_size = get_cache_size(3, true) * nthrs / 2; - - C_blks_per_iter = l3_size / working_set_size; - - if (C_blks_per_iter == 0) - C_blks_per_iter = 1; - if (C_blks_per_iter > C_blks) - C_blks_per_iter = C_blks; - - iters = (C_blks + C_blks_per_iter - 1) / C_blks_per_iter; -} - -bool thread_balance(bool do_blocking, bool spatial_thr_allowed, int ithr, - int nthr, dim_t N, dim_t C_blks, dim_t SP, int &C_ithr, int &C_nthr, - dim_t &C_blk_s, dim_t &C_blk_e, int &N_ithr, int &N_nthr, dim_t &N_s, - dim_t &N_e, int &S_ithr, int &S_nthr, dim_t &S_s, dim_t &S_e) { - if (nthr <= C_blks || !mkldnn_thr_syncable()) { - C_ithr = ithr; C_nthr = nthr; - N_ithr = 0; N_nthr = 1; - S_ithr = 0; S_nthr = 1; - N_s = 0; N_e = N; S_s = 0; S_e = SP; - balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e); - } else { - if (do_blocking) { - N_nthr = (int)nstl::min<dim_t>(N, nthr); - C_nthr = (int)nstl::min<dim_t>(C_blks, nthr / N_nthr); - S_nthr = (int)nstl::min<dim_t>(SP, nthr / (C_nthr * N_nthr)); - } else { - C_nthr = (int)math::gcd((dim_t)nthr, C_blks); - N_nthr = (int)nstl::min<dim_t>(N, nthr / C_nthr); - S_nthr = (int)nstl::min<dim_t>(SP, nthr / (C_nthr * N_nthr)); - } - - if (!spatial_thr_allowed) - S_nthr = 1; - - if (S_nthr < 1) S_nthr = 1; - if (ithr < C_nthr * N_nthr * S_nthr) { - N_ithr = (ithr / S_nthr) % N_nthr ; - C_ithr = ithr / (N_nthr * S_nthr); - S_ithr = ithr % S_nthr; - balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e); - balance211(N, N_nthr, N_ithr, N_s, N_e); - balance211(SP, S_nthr, S_ithr, S_s, S_e); - } else { - S_ithr = N_ithr = C_ithr = -ithr; - S_s = S_e = N_s = N_e = C_blk_s = C_blk_e = -1; - } - } - - // spatial_thr_allowed is meant to help maintain - // consistent decisions about spatial threading - // between mutiple invocations of this routine. - // It is caller's responsibility to check the - // return value and pass it as a flag to the - // next call if needed. - if (S_nthr == 1) - spatial_thr_allowed = false; - - return spatial_thr_allowed; -} - -bool is_spatial_thr(const batch_normalization_pd_t *bdesc, int simd_w, - int data_size) { - if (!mkldnn_thr_syncable()) return false; - - dim_t nthr = mkldnn_get_max_threads(); - dim_t SP = bdesc->W() * bdesc->D() * bdesc->H(); - dim_t C_PADDED = memory_desc_wrapper(bdesc->src_md()) - .padded_dims()[1]; - assert(C_PADDED % simd_w == 0); - - size_t data = bdesc->MB() * C_PADDED * SP * data_size; - size_t l3_size_ = get_cache_size(3, true) * nthr / 2; - bool do_blocking = (data >= l3_size_ / 2 && l3_size_ > 0); - dim_t C_blks_per_iter{ 1 }, iters{ 1 }; - dim_t C_blks = C_PADDED / simd_w; - - if (do_blocking) { - int num_tensors = bdesc->is_fwd() ? 1 : 2; - size_t working_set_size - = (bdesc->MB() * SP * simd_w * data_size) * num_tensors; - cache_balance(working_set_size, C_blks, C_blks_per_iter, iters); - } - - // Spatial threading decision made in this function shall be consistent - // with thread_balance() behavior. - C_blks = do_blocking ? C_blks_per_iter : C_blks; - - if (nthr <= C_blks) return false; - - dim_t S_nthr = 1; - if (do_blocking) { - dim_t N_nthr = nstl::min(bdesc->MB(), nthr); - dim_t C_nthr = nstl::min(C_blks, nthr / N_nthr); - S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr)); - } else { - dim_t C_nthr = math::gcd(nthr, C_blks); - dim_t N_nthr = nstl::min(bdesc->MB(), nthr / C_nthr); - S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr)); - } - - return S_nthr > 1; -} - -} -} -} -} |