From cae4bf58ac1b10af109b56ddaabac940d544c8f5 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Mon, 5 Feb 2024 19:26:45 +0200 Subject: [PATCH] [Core] Improve `CowData` and `Memory` metadata alignment. (cherry picked from commit b173a4d93551bbaabd3c4c26104f3c098055efae) --- include/godot_cpp/core/memory.hpp | 48 ++++++++---- include/godot_cpp/templates/cowdata.hpp | 99 ++++++++++++++++--------- src/core/memory.cpp | 12 +-- 3 files changed, 106 insertions(+), 53 deletions(-) diff --git a/include/godot_cpp/core/memory.hpp b/include/godot_cpp/core/memory.hpp index 97a2b13b..6775cf76 100644 --- a/include/godot_cpp/core/memory.hpp +++ b/include/godot_cpp/core/memory.hpp @@ -40,10 +40,6 @@ #include -#ifndef PAD_ALIGN -#define PAD_ALIGN 16 //must always be greater than this at much -#endif - // p_dummy argument is added to avoid conflicts with the engine functions when both engine and GDExtension are built as a static library on iOS. void *operator new(size_t p_size, const char *p_dummy, const char *p_description); ///< operator new that takes a description and uses MemoryStaticPool void *operator new(size_t p_size, const char *p_dummy, void *(*p_allocfunc)(size_t p_size)); ///< operator new that takes a description and uses MemoryStaticPool @@ -69,6 +65,18 @@ class Memory { Memory(); public: + // Alignment: ↓ max_align_t ↓ uint64_t ↓ max_align_t + // ┌─────────────────┬──┬────────────────┬──┬───────────... + // │ uint64_t │░░│ uint64_t │░░│ T[] + // │ alloc size │░░│ element count │░░│ data + // └─────────────────┴──┴────────────────┴──┴───────────... + // Offset: ↑ SIZE_OFFSET ↑ ELEMENT_OFFSET ↑ DATA_OFFSET + // Note: "alloc size" is used and set by the engine and is never accessed or changed for the extension. + + static constexpr size_t SIZE_OFFSET = 0; + static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t))); + static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t))); + static void *alloc_static(size_t p_bytes, bool p_pad_align = false); static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false); static void free_static(void *p_ptr, bool p_pad_align = false); @@ -99,7 +107,7 @@ struct Comparator { template void memdelete(T *p_class, typename std::enable_if>::type * = nullptr) { - if (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { p_class->~T(); } @@ -113,7 +121,7 @@ void memdelete(T *p_class) { template void memdelete_allocator(T *p_class) { - if (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { p_class->~T(); } @@ -136,6 +144,10 @@ public: #define memnew_arr(m_class, m_count) memnew_arr_template(m_count) +_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) { + return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET); +} + template T *memnew_arr_template(size_t p_elements, const char *p_descr = "") { if (p_elements == 0) { @@ -145,12 +157,14 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") { same strategy used by std::vector, and the Vector class, so it should be safe.*/ size_t len = sizeof(T) * p_elements; - uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true); + uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true); T *failptr = nullptr; // Get rid of a warning. ERR_FAIL_NULL_V(mem, failptr); - *(mem - 1) = p_elements; - if (!std::is_trivially_destructible::value) { + uint64_t *_elem_count_ptr = _get_element_count_ptr(mem); + *(_elem_count_ptr) = p_elements; + + if constexpr (!std::is_trivially_destructible_v) { T *elems = (T *)mem; /* call operator new */ @@ -163,11 +177,19 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") { } template -void memdelete_arr(T *p_class) { - uint64_t *ptr = (uint64_t *)p_class; +size_t memarr_len(const T *p_class) { + uint8_t *ptr = (uint8_t *)p_class; + uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr); + return *(_elem_count_ptr); +} - if (!std::is_trivially_destructible::value) { - uint64_t elem_count = *(ptr - 1); +template +void memdelete_arr(T *p_class) { + uint8_t *ptr = (uint8_t *)p_class; + + if constexpr (!std::is_trivially_destructible_v) { + uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr); + uint64_t elem_count = *(_elem_count_ptr); for (uint64_t i = 0; i < elem_count; i++) { p_class[i].~T(); diff --git a/include/godot_cpp/templates/cowdata.hpp b/include/godot_cpp/templates/cowdata.hpp index b6f66d7f..f2959d19 100644 --- a/include/godot_cpp/templates/cowdata.hpp +++ b/include/godot_cpp/templates/cowdata.hpp @@ -52,7 +52,7 @@ class VMap; template class CharStringT; -SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t) +static_assert(std::is_trivially_destructible_v>); // Silence a false positive warning (see GH-52119). #if defined(__GNUC__) && !defined(__clang__) @@ -96,18 +96,39 @@ private: return ++x; } - static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount. + // Alignment: ↓ max_align_t ↓ USize ↓ max_align_t + // ┌────────────────────┬──┬─────────────┬──┬───────────... + // │ SafeNumeric │░░│ USize │░░│ T[] + // │ ref. count │░░│ data size │░░│ data + // └────────────────────┴──┴─────────────┴──┴───────────... + // Offset: ↑ REF_COUNT_OFFSET ↑ SIZE_OFFSET ↑ DATA_OFFSET + + static constexpr size_t REF_COUNT_OFFSET = 0; + static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric)) % alignof(USize))); + static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t))); mutable T *_ptr = nullptr; // internal helpers + static _FORCE_INLINE_ SafeNumeric *_get_refcount_ptr(uint8_t *p_ptr) { + return (SafeNumeric *)(p_ptr + REF_COUNT_OFFSET); + } + + static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) { + return (USize *)(p_ptr + SIZE_OFFSET); + } + + static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) { + return (T *)(p_ptr + DATA_OFFSET); + } + _FORCE_INLINE_ SafeNumeric *_get_refcount() const { if (!_ptr) { return nullptr; } - return reinterpret_cast *>(_ptr) - 2; + return (SafeNumeric *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET); } _FORCE_INLINE_ USize *_get_size() const { @@ -115,7 +136,7 @@ private: return nullptr; } - return reinterpret_cast(_ptr) - 1; + return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET); } _FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const { @@ -240,7 +261,7 @@ void CowData::_unref(void *p_data) { } // clean up - if (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { USize *count = _get_size(); T *data = (T *)(count + 1); @@ -251,7 +272,7 @@ void CowData::_unref(void *p_data) { } // free mem - Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false); + Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false); } template @@ -267,26 +288,27 @@ typename CowData::USize CowData::_copy_on_write() { /* in use by more than me */ USize current_size = *_get_size(); - USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false); - mem_new += 2; + uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, 0); - new (mem_new - 2) SafeNumeric(1); //refcount - *(mem_new - 1) = current_size; //size + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + USize *_size_ptr = _get_size_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); - T *_data = (T *)(mem_new); + new (_refc_ptr) SafeNumeric(1); //refcount + *(_size_ptr) = current_size; //size // initialize new elements - if (std::is_trivially_copyable::value) { - memcpy(mem_new, _ptr, current_size * sizeof(T)); - + if constexpr (std::is_trivially_copyable_v) { + memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T)); } else { for (USize i = 0; i < current_size; i++) { - memnew_placement(&_data[i], T(_ptr[i])); + memnew_placement(&_data_ptr[i], T(_ptr[i])); } } _unref(_ptr); - _ptr = _data; + _ptr = _data_ptr; rc = 1; } @@ -322,27 +344,33 @@ Error CowData::resize(Size p_size) { if (alloc_size != current_alloc_size) { if (current_size == 0) { // alloc from scratch - USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false); - ptr += 2; - ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY); - *(ptr - 1) = 0; //size, currently none - new (ptr - 2) SafeNumeric(1); //refcount + uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - _ptr = (T *)ptr; + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + USize *_size_ptr = _get_size_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); + new (_refc_ptr) SafeNumeric(1); //refcount + *(_size_ptr) = 0; //size, currently none + + _ptr = _data_ptr; } else { - USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false); - ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY); - _ptrnew += 2; - new (_ptrnew - 2) SafeNumeric(rc); //refcount + uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - _ptr = (T *)(_ptrnew); + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); + + new (_refc_ptr) SafeNumeric(rc); //refcount + + _ptr = _data_ptr; } } // construct the newly created elements - if (!std::is_trivially_constructible::value) { + if constexpr (!std::is_trivially_constructible_v) { for (Size i = *_get_size(); i < p_size; i++) { memnew_placement(&_ptr[i], T); } @@ -353,7 +381,7 @@ Error CowData::resize(Size p_size) { *_get_size() = p_size; } else if (p_size < current_size) { - if (!std::is_trivially_destructible::value) { + if constexpr (!std::is_trivially_destructible_v) { // deinitialize no longer needed elements for (USize i = p_size; i < *_get_size(); i++) { T *t = &_ptr[i]; @@ -362,12 +390,15 @@ Error CowData::resize(Size p_size) { } if (alloc_size != current_alloc_size) { - USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false); - ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY); - _ptrnew += 2; - new (_ptrnew - 2) SafeNumeric(rc); //refcount + uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - _ptr = (T *)(_ptrnew); + SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); + T *_data_ptr = _get_data_ptr(mem_new); + + new (_refc_ptr) SafeNumeric(rc); //refcount + + _ptr = _data_ptr; } *_get_size() = p_size; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 81185115..f330c23c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -41,12 +41,12 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) { bool prepad = p_pad_align; #endif - void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? PAD_ALIGN : 0)); + void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? DATA_OFFSET : 0)); ERR_FAIL_NULL_V(mem, nullptr); if (prepad) { uint8_t *s8 = (uint8_t *)mem; - return s8 + PAD_ALIGN; + return s8 + DATA_OFFSET; } else { return mem; } @@ -69,10 +69,10 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) { #endif if (prepad) { - mem -= PAD_ALIGN; - mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + PAD_ALIGN); + mem -= DATA_OFFSET; + mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + DATA_OFFSET); ERR_FAIL_NULL_V(mem, nullptr); - return mem + PAD_ALIGN; + return mem + DATA_OFFSET; } else { return (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes); } @@ -88,7 +88,7 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) { #endif if (prepad) { - mem -= PAD_ALIGN; + mem -= DATA_OFFSET; } internal::gdextension_interface_mem_free(mem); }