448 lines
14 KiB
C++
448 lines
14 KiB
C++
|
// © 2016 and later: Unicode, Inc. and others.
|
||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||
|
/*
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 2014, International Business Machines
|
||
|
* Corporation and others. All Rights Reserved.
|
||
|
*******************************************************************************
|
||
|
* loadednormalizer2impl.cpp
|
||
|
*
|
||
|
* created on: 2014sep03
|
||
|
* created by: Markus W. Scherer
|
||
|
*/
|
||
|
|
||
|
#include "unicode/utypes.h"
|
||
|
|
||
|
#if !UCONFIG_NO_NORMALIZATION
|
||
|
|
||
|
#include "unicode/udata.h"
|
||
|
#include "unicode/localpointer.h"
|
||
|
#include "unicode/normalizer2.h"
|
||
|
#include "unicode/ucptrie.h"
|
||
|
#include "unicode/unistr.h"
|
||
|
#include "unicode/unorm.h"
|
||
|
#include "cstring.h"
|
||
|
#include "mutex.h"
|
||
|
#include "norm2allmodes.h"
|
||
|
#include "normalizer2impl.h"
|
||
|
#include "uassert.h"
|
||
|
#include "ucln_cmn.h"
|
||
|
#include "uhash.h"
|
||
|
|
||
|
U_NAMESPACE_BEGIN
|
||
|
|
||
|
class LoadedNormalizer2Impl : public Normalizer2Impl {
|
||
|
public:
|
||
|
LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
|
||
|
virtual ~LoadedNormalizer2Impl();
|
||
|
|
||
|
void load(const char *packageName, const char *name, UErrorCode &errorCode);
|
||
|
|
||
|
private:
|
||
|
static UBool U_CALLCONV
|
||
|
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
|
||
|
|
||
|
UDataMemory *memory;
|
||
|
UCPTrie *ownedTrie;
|
||
|
};
|
||
|
|
||
|
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
|
||
|
udata_close(memory);
|
||
|
ucptrie_close(ownedTrie);
|
||
|
}
|
||
|
|
||
|
UBool U_CALLCONV
|
||
|
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
|
||
|
const char * /* type */, const char * /*name*/,
|
||
|
const UDataInfo *pInfo) {
|
||
|
if(
|
||
|
pInfo->size>=20 &&
|
||
|
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||
|
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||
|
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
|
||
|
pInfo->dataFormat[1]==0x72 &&
|
||
|
pInfo->dataFormat[2]==0x6d &&
|
||
|
pInfo->dataFormat[3]==0x32 &&
|
||
|
pInfo->formatVersion[0]==4
|
||
|
) {
|
||
|
// Normalizer2Impl *me=(Normalizer2Impl *)context;
|
||
|
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
|
||
|
return true;
|
||
|
} else {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) {
|
||
|
return;
|
||
|
}
|
||
|
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
|
||
|
if(U_FAILURE(errorCode)) {
|
||
|
return;
|
||
|
}
|
||
|
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
|
||
|
const int32_t *inIndexes=(const int32_t *)inBytes;
|
||
|
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
|
||
|
if(indexesLength<=IX_MIN_LCCC_CP) {
|
||
|
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
|
||
|
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
|
||
|
ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
|
||
|
inBytes+offset, nextOffset-offset, nullptr,
|
||
|
&errorCode);
|
||
|
if(U_FAILURE(errorCode)) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
offset=nextOffset;
|
||
|
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
|
||
|
const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
|
||
|
|
||
|
// smallFCD: new in formatVersion 2
|
||
|
offset=nextOffset;
|
||
|
const uint8_t *inSmallFCD=inBytes+offset;
|
||
|
|
||
|
init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
|
||
|
}
|
||
|
|
||
|
// instance cache ---------------------------------------------------------- ***
|
||
|
|
||
|
Norm2AllModes *
|
||
|
Norm2AllModes::createInstance(const char *packageName,
|
||
|
const char *name,
|
||
|
UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) {
|
||
|
return nullptr;
|
||
|
}
|
||
|
LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
|
||
|
if(impl==nullptr) {
|
||
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||
|
return nullptr;
|
||
|
}
|
||
|
impl->load(packageName, name, errorCode);
|
||
|
return createInstance(impl, errorCode);
|
||
|
}
|
||
|
|
||
|
U_CDECL_BEGIN
|
||
|
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
|
||
|
U_CDECL_END
|
||
|
|
||
|
#if !NORM2_HARDCODE_NFC_DATA
|
||
|
static Norm2AllModes *nfcSingleton;
|
||
|
static icu::UInitOnce nfcInitOnce {};
|
||
|
#endif
|
||
|
|
||
|
static Norm2AllModes *nfkcSingleton;
|
||
|
static icu::UInitOnce nfkcInitOnce {};
|
||
|
|
||
|
static Norm2AllModes *nfkc_cfSingleton;
|
||
|
static icu::UInitOnce nfkc_cfInitOnce {};
|
||
|
|
||
|
static Norm2AllModes *nfkc_scfSingleton;
|
||
|
static icu::UInitOnce nfkc_scfInitOnce {};
|
||
|
|
||
|
static UHashtable *cache=nullptr;
|
||
|
|
||
|
// UInitOnce singleton initialization function
|
||
|
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
||
|
#if !NORM2_HARDCODE_NFC_DATA
|
||
|
if (uprv_strcmp(what, "nfc") == 0) {
|
||
|
nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
|
||
|
} else
|
||
|
#endif
|
||
|
if (uprv_strcmp(what, "nfkc") == 0) {
|
||
|
nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
|
||
|
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
||
|
nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
|
||
|
} else if (uprv_strcmp(what, "nfkc_scf") == 0) {
|
||
|
nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);
|
||
|
} else {
|
||
|
UPRV_UNREACHABLE_EXIT; // Unknown singleton
|
||
|
}
|
||
|
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
|
||
|
}
|
||
|
|
||
|
U_CDECL_BEGIN
|
||
|
|
||
|
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
|
||
|
delete (Norm2AllModes *)allModes;
|
||
|
}
|
||
|
|
||
|
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
|
||
|
#if !NORM2_HARDCODE_NFC_DATA
|
||
|
delete nfcSingleton;
|
||
|
nfcSingleton = nullptr;
|
||
|
nfcInitOnce.reset();
|
||
|
#endif
|
||
|
|
||
|
delete nfkcSingleton;
|
||
|
nfkcSingleton = nullptr;
|
||
|
nfkcInitOnce.reset();
|
||
|
|
||
|
delete nfkc_cfSingleton;
|
||
|
nfkc_cfSingleton = nullptr;
|
||
|
nfkc_cfInitOnce.reset();
|
||
|
|
||
|
delete nfkc_scfSingleton;
|
||
|
nfkc_scfSingleton = nullptr;
|
||
|
nfkc_scfInitOnce.reset();
|
||
|
|
||
|
uhash_close(cache);
|
||
|
cache=nullptr;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
U_CDECL_END
|
||
|
|
||
|
#if !NORM2_HARDCODE_NFC_DATA
|
||
|
const Norm2AllModes *
|
||
|
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) { return nullptr; }
|
||
|
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||
|
return nfcSingleton;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
const Norm2AllModes *
|
||
|
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) { return nullptr; }
|
||
|
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
||
|
return nfkcSingleton;
|
||
|
}
|
||
|
|
||
|
const Norm2AllModes *
|
||
|
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) { return nullptr; }
|
||
|
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
|
||
|
return nfkc_cfSingleton;
|
||
|
}
|
||
|
|
||
|
const Norm2AllModes *
|
||
|
Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) { return nullptr; }
|
||
|
umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);
|
||
|
return nfkc_scfSingleton;
|
||
|
}
|
||
|
|
||
|
#if !NORM2_HARDCODE_NFC_DATA
|
||
|
const Normalizer2 *
|
||
|
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->comp : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *
|
||
|
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->decomp : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->fcd : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->fcc : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2Impl *
|
||
|
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||
|
return allModes!=nullptr ? allModes->impl : nullptr;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
const Normalizer2 *
|
||
|
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->comp : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *
|
||
|
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->decomp : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *
|
||
|
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->comp : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *
|
||
|
Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
|
||
|
return allModes!=nullptr ? &allModes->comp : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *
|
||
|
Normalizer2::getInstance(const char *packageName,
|
||
|
const char *name,
|
||
|
UNormalization2Mode mode,
|
||
|
UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) {
|
||
|
return nullptr;
|
||
|
}
|
||
|
if(name==nullptr || *name==0) {
|
||
|
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||
|
return nullptr;
|
||
|
}
|
||
|
const Norm2AllModes *allModes=nullptr;
|
||
|
if(packageName==nullptr) {
|
||
|
if(0==uprv_strcmp(name, "nfc")) {
|
||
|
allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||
|
} else if(0==uprv_strcmp(name, "nfkc")) {
|
||
|
allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||
|
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
|
||
|
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||
|
} else if(0==uprv_strcmp(name, "nfkc_scf")) {
|
||
|
allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
|
||
|
}
|
||
|
}
|
||
|
if(allModes==nullptr && U_SUCCESS(errorCode)) {
|
||
|
{
|
||
|
Mutex lock;
|
||
|
if(cache!=nullptr) {
|
||
|
allModes=(Norm2AllModes *)uhash_get(cache, name);
|
||
|
}
|
||
|
}
|
||
|
if(allModes==nullptr) {
|
||
|
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
|
||
|
LocalPointer<Norm2AllModes> localAllModes(
|
||
|
Norm2AllModes::createInstance(packageName, name, errorCode));
|
||
|
if(U_SUCCESS(errorCode)) {
|
||
|
Mutex lock;
|
||
|
if(cache==nullptr) {
|
||
|
cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
|
||
|
if(U_FAILURE(errorCode)) {
|
||
|
return nullptr;
|
||
|
}
|
||
|
uhash_setKeyDeleter(cache, uprv_free);
|
||
|
uhash_setValueDeleter(cache, deleteNorm2AllModes);
|
||
|
}
|
||
|
void *temp=uhash_get(cache, name);
|
||
|
if(temp==nullptr) {
|
||
|
int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
|
||
|
char *nameCopy=(char *)uprv_malloc(keyLength);
|
||
|
if(nameCopy==nullptr) {
|
||
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||
|
return nullptr;
|
||
|
}
|
||
|
uprv_memcpy(nameCopy, name, keyLength);
|
||
|
allModes=localAllModes.getAlias();
|
||
|
uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
|
||
|
} else {
|
||
|
// race condition
|
||
|
allModes=(Norm2AllModes *)temp;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if(allModes!=nullptr && U_SUCCESS(errorCode)) {
|
||
|
switch(mode) {
|
||
|
case UNORM2_COMPOSE:
|
||
|
return &allModes->comp;
|
||
|
case UNORM2_DECOMPOSE:
|
||
|
return &allModes->decomp;
|
||
|
case UNORM2_FCD:
|
||
|
return &allModes->fcd;
|
||
|
case UNORM2_COMPOSE_CONTIGUOUS:
|
||
|
return &allModes->fcc;
|
||
|
default:
|
||
|
break; // do nothing
|
||
|
}
|
||
|
}
|
||
|
return nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2 *
|
||
|
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
|
||
|
if(U_FAILURE(errorCode)) {
|
||
|
return nullptr;
|
||
|
}
|
||
|
switch(mode) {
|
||
|
case UNORM_NFD:
|
||
|
return Normalizer2::getNFDInstance(errorCode);
|
||
|
case UNORM_NFKD:
|
||
|
return Normalizer2::getNFKDInstance(errorCode);
|
||
|
case UNORM_NFC:
|
||
|
return Normalizer2::getNFCInstance(errorCode);
|
||
|
case UNORM_NFKC:
|
||
|
return Normalizer2::getNFKCInstance(errorCode);
|
||
|
case UNORM_FCD:
|
||
|
return getFCDInstance(errorCode);
|
||
|
default: // UNORM_NONE
|
||
|
return getNoopInstance(errorCode);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const Normalizer2Impl *
|
||
|
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||
|
return allModes!=nullptr ? allModes->impl : nullptr;
|
||
|
}
|
||
|
|
||
|
const Normalizer2Impl *
|
||
|
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
|
||
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||
|
return allModes!=nullptr ? allModes->impl : nullptr;
|
||
|
}
|
||
|
|
||
|
U_NAMESPACE_END
|
||
|
|
||
|
// C API ------------------------------------------------------------------- ***
|
||
|
|
||
|
U_NAMESPACE_USE
|
||
|
|
||
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
||
|
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
|
||
|
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
|
||
|
}
|
||
|
|
||
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
||
|
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
|
||
|
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
|
||
|
}
|
||
|
|
||
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
||
|
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
|
||
|
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
|
||
|
}
|
||
|
|
||
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
||
|
unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {
|
||
|
return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);
|
||
|
}
|
||
|
|
||
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
||
|
unorm2_getInstance(const char *packageName,
|
||
|
const char *name,
|
||
|
UNormalization2Mode mode,
|
||
|
UErrorCode *pErrorCode) {
|
||
|
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
|
||
|
}
|
||
|
|
||
|
U_CFUNC UNormalizationCheckResult
|
||
|
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
|
||
|
if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
|
||
|
return UNORM_YES;
|
||
|
}
|
||
|
UErrorCode errorCode=U_ZERO_ERROR;
|
||
|
const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
|
||
|
if(U_SUCCESS(errorCode)) {
|
||
|
return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
|
||
|
} else {
|
||
|
return UNORM_MAYBE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#endif // !UCONFIG_NO_NORMALIZATION
|