查找字符
用于从字符流中查找指定字符数量,可以参考 Searching Gigabytes of Data per second with simd。
__attribute__((target("sse4.2")))
static inline size_t char_count(const char *data, size_t len, const char ch) {
constexpr int SIMD_SIZE = sizeof(__m128i);
const auto end = data + len;
const auto simd_end = data + (len & ~(SIMD_SIZE - 1));
size_t total = 0;
__m128i cmp = _mm_set1_epi8(ch);
for (; data < simd_end; data += SIMD_SIZE) {
__m128i res = _mm_cmpeq_epi8(_mm_loadu_si128((__m128i const *) data), cmp);
total += __builtin_popcount(_mm_movemask_epi8(res));
}
for (; data < end; data++) {
if (*data == ch)
total++;
}
return total;
}
__attribute__((target("default")))
static inline size_t char_count(const char *data, size_t len, const char ch) {
size_t total = 0;
for (; len > 0; len--, data++) {
if (*data == ch)
total++;
}
return total;
}
UTF8
SIMD ASCII Check UTF8 检查 UTF8 是否为 ASCII 字符,生产用的可以参考 SIMDJSON 库的实现。
对于字符流来说,只要某个字符超过 127 ,那么就肯定不是 ASCII 字符。
__attribute__((target("avx512f")))
bool utf8_is_ascii(const char *src, size_t len) {
size_t i = 0;
__m512i has_error = _mm512_setzero_si512();
for (; i <= len - 64; i += 64) {
__m512i current_bytes = _mm512_loadu_si512((const __m512i *) (src + i));
has_error = _mm512_or_si512(has_error, current_bytes);
}
int reduced = _mm512_reduce_or_epi32(has_error);
for (; i < len; i++) {
reduced |= src[i];
}
return !(reduced & 0x80808080);
}
__attribute__((target("avx2")))
bool utf8_is_ascii(const char *src, size_t len) {
size_t i = 0;
__m256i has_error = _mm256_setzero_si256();
for (; i <= len - 32; i += 32) {
__m256i current_bytes = _mm256_loadu_si256((const __m256i *) (src + i));
has_error = _mm256_or_si256(has_error, current_bytes);
}
int error_mask = _mm256_movemask_epi8(has_error);
char tail_has_error = 0;
for (; i < len; i++) {
tail_has_error |= src[i];
}
error_mask |= (tail_has_error & 0x80);
return !error_mask;
}
__attribute__((target("sse2")))
bool utf8_is_ascii(const char *src, size_t len) {
size_t i = 0;
__m128i has_error = _mm_setzero_si128();
for (; i <= len - 16; i += 16) {
__m128i current_bytes = _mm_loadu_si128((const __m128i *) (src + i));
has_error = _mm_or_si128(has_error, current_bytes);
}
int error_mask = _mm_movemask_epi8(has_error);
char tail_has_error = 0;
for (; i < len; i++) {
tail_has_error |= src[i];
}
error_mask |= (tail_has_error & 0x80);
return !error_mask;
}
__attribute__((target("default")))
bool utf8_is_ascii(const char *src, size_t len) {
char is_ascii = 0;
for (size_t i = 0; i < len; i++) {
is_ascii |= src[i];
}
return !(is_ascii & 0x80);
}