PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
ascii.h
Go to the documentation of this file.
1/*-----------------------------------------------------------------------
2 * ascii.h
3 *
4 * Portions Copyright (c) 1999-2025, PostgreSQL Global Development Group
5 *
6 * src/include/utils/ascii.h
7 *
8 *-----------------------------------------------------------------------
9 */
10
11#ifndef _ASCII_H_
12#define _ASCII_H_
13
14#include "port/simd.h"
15
16extern void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz);
17
18/*
19 * Verify a chunk of bytes for valid ASCII.
20 *
21 * Returns false if the input contains any zero bytes or bytes with the
22 * high-bit set. Input len must be a multiple of the chunk size (8 or 16).
23 */
24static inline bool
25is_valid_ascii(const unsigned char *s, int len)
26{
27 const unsigned char *const s_end = s + len;
29 Vector8 highbit_cum = vector8_broadcast(0);
30#ifdef USE_NO_SIMD
31 Vector8 zero_cum = vector8_broadcast(0x80);
32#endif
33
34 Assert(len % sizeof(chunk) == 0);
35
36 while (s < s_end)
37 {
39
40 /* Capture any zero bytes in this chunk. */
41#ifdef USE_NO_SIMD
42
43 /*
44 * First, add 0x7f to each byte. This sets the high bit in each byte,
45 * unless it was a zero. If any resulting high bits are zero, the
46 * corresponding high bits in the zero accumulator will be cleared.
47 *
48 * If none of the bytes in the chunk had the high bit set, the max
49 * value each byte can have after the addition is 0x7f + 0x7f = 0xfe,
50 * and we don't need to worry about carrying over to the next byte. If
51 * any input bytes did have the high bit set, it doesn't matter
52 * because we check for those separately.
53 */
54 zero_cum &= (chunk + vector8_broadcast(0x7F));
55#else
56
57 /*
58 * Set all bits in each lane of the highbit accumulator where input
59 * bytes are zero.
60 */
61 highbit_cum = vector8_or(highbit_cum,
62 vector8_eq(chunk, vector8_broadcast(0)));
63#endif
64
65 /* Capture all set bits in this chunk. */
66 highbit_cum = vector8_or(highbit_cum, chunk);
67
68 s += sizeof(chunk);
69 }
70
71 /* Check if any high bits in the high bit accumulator got set. */
72 if (vector8_is_highbit_set(highbit_cum))
73 return false;
74
75#ifdef USE_NO_SIMD
76 /* Check if any high bits in the zero accumulator got cleared. */
77 if (zero_cum != vector8_broadcast(0x80))
78 return false;
79#endif
80
81 return true;
82}
83
84#endif /* _ASCII_H_ */
void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
Definition: ascii.c:174
static bool is_valid_ascii(const unsigned char *s, int len)
Definition: ascii.h:25
#define Assert(condition)
Definition: c.h:815
uint64 chunk
const void size_t len
static Vector8 vector8_broadcast(const uint8 c)
Definition: simd.h:135
static void vector8_load(Vector8 *v, const uint8 *s)
Definition: simd.h:108
static Vector8 vector8_or(const Vector8 v1, const Vector8 v2)
Definition: simd.h:338
uint64 Vector8
Definition: simd.h:60
static bool vector8_is_highbit_set(const Vector8 v)
Definition: simd.h:271