diff options
author | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2024-11-03 11:57:07 -0800 |
---|---|---|
committer | Arseny Kapoulkine <arseny.kapoulkine@gmail.com> | 2024-11-04 06:58:06 -0800 |
commit | 260287b3a14ac17227ef1987d5410f4f2c6ef275 (patch) | |
tree | c43571e910cc9db5300bbb9151fec570d1331bd5 /scene/resources | |
parent | 1bffd6c73b44b85e5889f54e14b2193940cf5bb1 (diff) | |
download | redot-engine-260287b3a14ac17227ef1987d5410f4f2c6ef275.tar.gz |
Rewrite index optimization code for maximum efficiency
While all the previous fixes to optimizeVertexCache invocation fixed the
vertex transform efficiency, the import code still was missing two
crucial recommendations from meshoptimizer documentation:
- All meshes should be optimized for vertex cache (this reorders
vertices for maximum fetch efficiency)
- When LODs are used with a shared vertex buffer, the vertex order
should be generated by doing a vertex fetch optimization on the
concatenated index buffer from coarse to fine LODs; this maximizes
fetch efficiency for coarse LODs
The last point is especially crucial for Mali GPUs; unlike other GPUs
where vertex order affects fetch efficiency but not shading, these GPUs
have various shading quirks (depending on the GPU generation) that
really require consecutive index ranges for each LOD, which requires the
second optimization mentioned above. However all of these also help
desktop GPUs and other mobile GPUs as well.
Because this optimization is "global" in the sense that it affects all
LODs and all vertex arrays in concert, I've taken this opportunity to
isolate all optimization code in this function and pull it out of
generate_lods and create_shadow_mesh; this doesn't change the vertex
cache efficiency, but makes the code cleaner. Consequently,
optimize_indices should be called after other functions like
create_shadow_mesh / generate_lods.
This required exposing meshopt_optimizeVertexFetchRemap; as a drive-by,
meshopt_simplifySloppy was never used so it's not exposed anymore - this
will simplify future meshopt upgrades if they end up changing the
function's interface.
Diffstat (limited to 'scene/resources')
-rw-r--r-- | scene/resources/3d/importer_mesh.cpp | 103 | ||||
-rw-r--r-- | scene/resources/3d/importer_mesh.h | 2 | ||||
-rw-r--r-- | scene/resources/surface_tool.cpp | 2 | ||||
-rw-r--r-- | scene/resources/surface_tool.h | 8 |
4 files changed, 92 insertions, 23 deletions
diff --git a/scene/resources/3d/importer_mesh.cpp b/scene/resources/3d/importer_mesh.cpp index e255cb077f..f040f04cd8 100644 --- a/scene/resources/3d/importer_mesh.cpp +++ b/scene/resources/3d/importer_mesh.cpp @@ -168,10 +168,56 @@ void ImporterMesh::set_surface_material(int p_surface, const Ref<Material> &p_ma mesh.unref(); } -void ImporterMesh::optimize_indices_for_cache() { +template <typename T> +static Vector<T> _remap_array(Vector<T> p_array, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) { + ERR_FAIL_COND_V(p_array.size() % p_remap.size() != 0, p_array); + int num_elements = p_array.size() / p_remap.size(); + T *data = p_array.ptrw(); + SurfaceTool::remap_vertex_func(data, data, p_remap.size(), sizeof(T) * num_elements, p_remap.ptr()); + p_array.resize(p_vertex_count * num_elements); + return p_array; +} + +static void _remap_arrays(Array &r_arrays, const Vector<uint32_t> &p_remap, uint32_t p_vertex_count) { + for (int i = 0; i < r_arrays.size(); i++) { + if (i == RS::ARRAY_INDEX) { + continue; + } + + switch (r_arrays[i].get_type()) { + case Variant::NIL: + break; + case Variant::PACKED_VECTOR3_ARRAY: + r_arrays[i] = _remap_array<Vector3>(r_arrays[i], p_remap, p_vertex_count); + break; + case Variant::PACKED_VECTOR2_ARRAY: + r_arrays[i] = _remap_array<Vector2>(r_arrays[i], p_remap, p_vertex_count); + break; + case Variant::PACKED_FLOAT32_ARRAY: + r_arrays[i] = _remap_array<float>(r_arrays[i], p_remap, p_vertex_count); + break; + case Variant::PACKED_INT32_ARRAY: + r_arrays[i] = _remap_array<int32_t>(r_arrays[i], p_remap, p_vertex_count); + break; + case Variant::PACKED_BYTE_ARRAY: + r_arrays[i] = _remap_array<uint8_t>(r_arrays[i], p_remap, p_vertex_count); + break; + case Variant::PACKED_COLOR_ARRAY: + r_arrays[i] = _remap_array<Color>(r_arrays[i], p_remap, p_vertex_count); + break; + default: + ERR_FAIL_MSG("Unhandled array type."); + } + } +} + +void ImporterMesh::optimize_indices() { if (!SurfaceTool::optimize_vertex_cache_func) { return; } + if (!SurfaceTool::optimize_vertex_fetch_remap_func || !SurfaceTool::remap_vertex_func || !SurfaceTool::remap_index_func) { + return; + } for (int i = 0; i < surfaces.size(); i++) { if (surfaces[i].primitive != Mesh::PRIMITIVE_TRIANGLES) { @@ -188,10 +234,48 @@ void ImporterMesh::optimize_indices_for_cache() { continue; } + // Optimize indices for vertex cache to establish final triangle order. int *indices_ptr = indices.ptrw(); SurfaceTool::optimize_vertex_cache_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, vertex_count); + surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices; + + for (int j = 0; j < surfaces[i].lods.size(); ++j) { + Surface::LOD &lod = surfaces.write[i].lods.write[j]; + int *lod_indices_ptr = lod.indices.ptrw(); + SurfaceTool::optimize_vertex_cache_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), vertex_count); + } + // Concatenate indices for all LODs in the order of coarse->fine; this establishes the effective order of vertices, + // and is important to optimize for vertex fetch (all GPUs) and shading (Mali GPUs) + PackedInt32Array merged_indices; + for (int j = surfaces[i].lods.size() - 1; j >= 0; --j) { + merged_indices.append_array(surfaces[i].lods[j].indices); + } + merged_indices.append_array(indices); + + // Generate remap array that establishes optimal vertex order according to the order of indices above. + Vector<uint32_t> remap; + remap.resize(vertex_count); + unsigned int new_vertex_count = SurfaceTool::optimize_vertex_fetch_remap_func(remap.ptrw(), (const unsigned int *)merged_indices.ptr(), merged_indices.size(), vertex_count); + + // We need to remap all vertex and index arrays in lockstep according to the remap. + SurfaceTool::remap_index_func((unsigned int *)indices_ptr, (const unsigned int *)indices_ptr, index_count, remap.ptr()); surfaces.write[i].arrays[RS::ARRAY_INDEX] = indices; + + for (int j = 0; j < surfaces[i].lods.size(); ++j) { + Surface::LOD &lod = surfaces.write[i].lods.write[j]; + int *lod_indices_ptr = lod.indices.ptrw(); + SurfaceTool::remap_index_func((unsigned int *)lod_indices_ptr, (const unsigned int *)lod_indices_ptr, lod.indices.size(), remap.ptr()); + } + + _remap_arrays(surfaces.write[i].arrays, remap, new_vertex_count); + for (int j = 0; j < surfaces[i].blend_shape_data.size(); j++) { + _remap_arrays(surfaces.write[i].blend_shape_data.write[j].arrays, remap, new_vertex_count); + } + } + + if (shadow_mesh.is_valid()) { + shadow_mesh->optimize_indices(); } } @@ -215,9 +299,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf if (!SurfaceTool::simplify_with_attrib_func) { return; } - if (!SurfaceTool::optimize_vertex_cache_func) { - return; - } LocalVector<Transform3D> bone_transform_vector; for (int i = 0; i < p_bone_transform_array.size(); i++) { @@ -431,12 +512,6 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, Array p_bone_transf } surfaces.write[i].lods.sort_custom<Surface::LODComparator>(); - - for (int j = 0; j < surfaces.write[i].lods.size(); j++) { - Surface::LOD &lod = surfaces.write[i].lods.write[j]; - unsigned int *lod_indices_ptr = (unsigned int *)lod.indices.ptrw(); - SurfaceTool::optimize_vertex_cache_func(lod_indices_ptr, lod_indices_ptr, lod.indices.size(), vertex_count); - } } } @@ -574,10 +649,6 @@ void ImporterMesh::create_shadow_mesh() { index_wptr[j] = vertex_remap[index]; } - if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) { - SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size()); - } - new_surface[RS::ARRAY_INDEX] = new_indices; // Make sure the same LODs as the full version are used. @@ -596,10 +667,6 @@ void ImporterMesh::create_shadow_mesh() { index_wptr[k] = vertex_remap[index]; } - if (SurfaceTool::optimize_vertex_cache_func && surfaces[i].primitive == Mesh::PRIMITIVE_TRIANGLES) { - SurfaceTool::optimize_vertex_cache_func((unsigned int *)index_wptr, (const unsigned int *)index_wptr, index_count, new_vertices.size()); - } - lods[surfaces[i].lods[j].distance] = new_indices; } } diff --git a/scene/resources/3d/importer_mesh.h b/scene/resources/3d/importer_mesh.h index 7776e78f11..2bdf759da6 100644 --- a/scene/resources/3d/importer_mesh.h +++ b/scene/resources/3d/importer_mesh.h @@ -113,7 +113,7 @@ public: void set_surface_material(int p_surface, const Ref<Material> &p_material); - void optimize_indices_for_cache(); + void optimize_indices(); void generate_lods(float p_normal_merge_angle, Array p_skin_pose_transform_array); diff --git a/scene/resources/surface_tool.cpp b/scene/resources/surface_tool.cpp index 6921885ee0..c230cf1b70 100644 --- a/scene/resources/surface_tool.cpp +++ b/scene/resources/surface_tool.cpp @@ -33,10 +33,10 @@ #define EQ_VERTEX_DIST 0.00001 SurfaceTool::OptimizeVertexCacheFunc SurfaceTool::optimize_vertex_cache_func = nullptr; +SurfaceTool::OptimizeVertexFetchRemapFunc SurfaceTool::optimize_vertex_fetch_remap_func = nullptr; SurfaceTool::SimplifyFunc SurfaceTool::simplify_func = nullptr; SurfaceTool::SimplifyWithAttribFunc SurfaceTool::simplify_with_attrib_func = nullptr; SurfaceTool::SimplifyScaleFunc SurfaceTool::simplify_scale_func = nullptr; -SurfaceTool::SimplifySloppyFunc SurfaceTool::simplify_sloppy_func = nullptr; SurfaceTool::GenerateRemapFunc SurfaceTool::generate_remap_func = nullptr; SurfaceTool::RemapVertexFunc SurfaceTool::remap_vertex_func = nullptr; SurfaceTool::RemapIndexFunc SurfaceTool::remap_index_func = nullptr; diff --git a/scene/resources/surface_tool.h b/scene/resources/surface_tool.h index 3b18e082a1..68dc9e7198 100644 --- a/scene/resources/surface_tool.h +++ b/scene/resources/surface_tool.h @@ -90,14 +90,14 @@ public: typedef void (*OptimizeVertexCacheFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count); static OptimizeVertexCacheFunc optimize_vertex_cache_func; + typedef size_t (*OptimizeVertexFetchRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count); + static OptimizeVertexFetchRemapFunc optimize_vertex_fetch_remap_func; typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float *r_error); static SimplifyFunc simplify_func; typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, const float *attributes, size_t attribute_stride, const float *attribute_weights, size_t attribute_count, const unsigned char *vertex_lock, size_t target_index_count, float target_error, unsigned int options, float *result_error); static SimplifyWithAttribFunc simplify_with_attrib_func; typedef float (*SimplifyScaleFunc)(const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride); static SimplifyScaleFunc simplify_scale_func; - typedef size_t (*SimplifySloppyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *out_result_error); - static SimplifySloppyFunc simplify_sloppy_func; typedef size_t (*GenerateRemapFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const void *vertices, size_t vertex_count, size_t vertex_size); static GenerateRemapFunc generate_remap_func; typedef void (*RemapVertexFunc)(void *destination, const void *vertices, size_t vertex_count, size_t vertex_size, const unsigned int *remap); @@ -222,7 +222,9 @@ public: void clear(); - LocalVector<Vertex> &get_vertex_array() { return vertex_array; } + LocalVector<Vertex> &get_vertex_array() { + return vertex_array; + } void create_from_triangle_arrays(const Array &p_arrays); void create_from_arrays(const Array &p_arrays, Mesh::PrimitiveType p_primitive_type = Mesh::PRIMITIVE_TRIANGLES); |