1 modulederelict.utf8proc.statfun;
2 3 version (Derelict_Static) version = DerelictUTF8Proc_Static;
4 version (DerelictUTF8Proc_Static) : publicimportderelict.utf8proc.types;
5 6 extern (C) @nogcnothrow7 {
8 /**
9 * Array containing the byte lengths of a UTF-8 encoded codepoint based
10 * on the first byte.
11 */12 externconstutf8proc_int8_t[256] utf8proc_utf8class;
13 14 /**
15 * Returns the utf8proc API version as a string MAJOR.MINOR.PATCH
16 * (http://semver.org format), possibly with a "-dev" suffix for
17 * development versions.
18 */19 externconst(char)* utf8proc_version();
20 21 /**
22 * Returns an informative error string for the given utf8proc error code
23 * (e.g. the error codes returned by @ref utf8proc_map).
24 */25 externconst(char)* utf8proc_errmsg(utf8proc_ssize_terrcode);
26 27 /**
28 * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
29 * The maximum number of bytes read is `strlen`, unless `strlen` is
30 * negative (in which case up to 4 bytes are read).
31 *
32 * If a valid codepoint could be read, it is stored in the variable
33 * pointed to by `codepoint_ref`, otherwise that variable will be set to -1.
34 * In case of success, the number of bytes read is returned; otherwise, a
35 * negative error code is returned.
36 */37 externutf8proc_ssize_tutf8proc_iterate(constutf8proc_uint8_t* str,
38 utf8proc_ssize_tstrlen, utf8proc_int32_t* codepoint_ref);
39 40 /**
41 * Check if a codepoint is valid (regardless of whether it has been
42 * assigned a value by the current Unicode standard).
43 *
44 * @return 1 if the given `codepoint` is valid and otherwise return 0.
45 */46 externutf8proc_boolutf8proc_codepoint_valid(utf8proc_int32_tcodepoint);
47 48 /**
49 * Encodes the codepoint as an UTF-8 string in the byte array pointed
50 * to by `dst`. This array must be at least 4 bytes long.
51 *
52 * In case of success the number of bytes written is returned, and
53 * otherwise 0 is returned.
54 *
55 * This function does not check whether `codepoint` is valid Unicode.
56 */57 externutf8proc_ssize_tutf8proc_encode_char(utf8proc_int32_tcodepoint, utf8proc_uint8_t* dst);
58 59 /**
60 * Look up the properties for a given codepoint.
61 *
62 * @param codepoint The Unicode codepoint.
63 *
64 * @returns
65 * A pointer to a (constant) struct containing information about
66 * the codepoint.
67 * @par
68 * If the codepoint is unassigned or invalid, a pointer to a special struct is
69 * returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN).
70 */71 externconst(utf8proc_property_t)* utf8proc_get_property(utf8proc_int32_tcodepoint);
72 73 /** Decompose a codepoint into an array of codepoints.
74 *
75 * @param codepoint the codepoint.
76 * @param dst the destination buffer.
77 * @param bufsize the size of the destination buffer.
78 * @param options one or more of the following flags:
79 * - @ref UTF8PROC_REJECTNA - return an error `codepoint` is unassigned
80 * - @ref UTF8PROC_IGNORE - strip "default ignorable" codepoints
81 * - @ref UTF8PROC_CASEFOLD - apply Unicode casefolding
82 * - @ref UTF8PROC_COMPAT - replace certain codepoints with their
83 * compatibility decomposition
84 * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
85 * - @ref UTF8PROC_LUMP - lump certain different codepoints together
86 * - @ref UTF8PROC_STRIPMARK - remove all character marks
87 * @param last_boundclass
88 * Pointer to an integer variable containing
89 * the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
90 * option is used. Otherwise, this parameter is ignored.
91 *
92 * @return
93 * In case of success, the number of codepoints written is returned; in case
94 * of an error, a negative error code is returned (@ref utf8proc_errmsg).
95 * @par
96 * If the number of written codepoints would be bigger than `bufsize`, the
97 * required buffer size is returned, while the buffer will be overwritten with
98 * undefined data.
99 */100 externutf8proc_ssize_tutf8proc_decompose_char(utf8proc_int32_tcodepoint,
101 utf8proc_int32_t* dst, utf8proc_ssize_tbufsize, utf8proc_option_toptions,
102 int* last_boundclass);
103 104 /**
105 * The same as @ref utf8proc_decompose_char, but acts on a whole UTF-8
106 * string and orders the decomposed sequences correctly.
107 *
108 * If the @ref UTF8PROC_NULLTERM flag in `options` is set, processing
109 * will be stopped, when a NULL byte is encounted, otherwise `strlen`
110 * bytes are processed. The result (in the form of 32-bit unicode
111 * codepoints) is written into the buffer being pointed to by
112 * `buffer` (which must contain at least `bufsize` entries). In case of
113 * success, the number of codepoints written is returned; in case of an
114 * error, a negative error code is returned (@ref utf8proc_errmsg).
115 * See @ref utf8proc_decompose_custom to supply additional transformations.
116 *
117 * If the number of written codepoints would be bigger than `bufsize`, the
118 * required buffer size is returned, while the buffer will be overwritten with
119 * undefined data.
120 */121 externutf8proc_ssize_tutf8proc_decompose(constutf8proc_uint8_t* str, utf8proc_ssize_tstrlen,
122 utf8proc_int32_t* buffer, utf8proc_ssize_tbufsize, utf8proc_option_toptions);
123 124 /**
125 * The same as @ref utf8proc_decompose, but also takes a `custom_func` mapping function
126 * that is called on each codepoint in `str` before any other transformations
127 * (along with a `custom_data` pointer that is passed through to `custom_func`).
128 * The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom.
129 */130 externutf8proc_ssize_tutf8proc_decompose_custom(constutf8proc_uint8_t* str,
131 utf8proc_ssize_tstrlen, utf8proc_int32_t* buffer,
132 utf8proc_ssize_tbufsize, utf8proc_option_toptions,
133 utf8proc_custom_funccustom_func, void* custom_data);
134 135 /**
136 * Normalizes the sequence of `length` codepoints pointed to by `buffer`
137 * in-place (i.e., the result is also stored in `buffer`).
138 *
139 * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode.
140 * @param length the length (in codepoints) of the buffer.
141 * @param options a bitwise or (`|`) of one or more of the following flags:
142 * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS
143 * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS
144 * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF
145 * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters
146 * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite
147 * codepoints
148 * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate
149 * the unicode versioning stability
150 *
151 * @return
152 * In case of success, the length (in codepoints) of the normalized UTF-32 string is
153 * returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg).
154 *
155 * @warning The entries of the array pointed to by `str` have to be in the
156 * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
157 */158 externutf8proc_ssize_tutf8proc_normalize_utf32(utf8proc_int32_t* buffer,
159 utf8proc_ssize_tlength, utf8proc_option_toptions);
160 161 /**
162 * Reencodes the sequence of `length` codepoints pointed to by `buffer`
163 * UTF-8 data in-place (i.e., the result is also stored in `buffer`).
164 * Can optionally normalize the UTF-32 sequence prior to UTF-8 conversion.
165 *
166 * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode.
167 * @param length the length (in codepoints) of the buffer.
168 * @param options a bitwise or (`|`) of one or more of the following flags:
169 * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS
170 * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS
171 * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF
172 * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters
173 * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite
174 * codepoints
175 * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate
176 * the unicode versioning stability
177 * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
178 *
179 * @return
180 * In case of success, the length (in bytes) of the resulting nul-terminated
181 * UTF-8 string is returned; otherwise, a negative error code is returned
182 * (@ref utf8proc_errmsg).
183 *
184 * @warning The amount of free space pointed to by `buffer` must
185 * exceed the amount of the input data by one byte, and the
186 * entries of the array pointed to by `str` have to be in the
187 * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
188 */189 externutf8proc_ssize_tutf8proc_reencode(utf8proc_int32_t* buffer,
190 utf8proc_ssize_tlength, utf8proc_option_toptions);
191 192 /**
193 * Given a pair of consecutive codepoints, return whether a grapheme break is
194 * permitted between them (as defined by the extended grapheme clusters in UAX#29).
195 *
196 * @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires
197 * state to break graphemes. This state can be passed in as a pointer
198 * in the `state` argument and should initially be set to 0. If the
199 * state is not passed in (i.e. a null pointer is passed), UAX#29 rules
200 * GB10/12/13 which require this state will not be applied, essentially
201 * matching the rules in Unicode 8.0.0.
202 *
203 * @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
204 * be called IN ORDER on ALL potential breaks in a string.
205 */206 externutf8proc_boolutf8proc_grapheme_break_stateful(utf8proc_int32_tcodepoint1,
207 utf8proc_int32_tcodepoint2, utf8proc_int32_t* state);
208 209 /**
210 * Same as @ref utf8proc_grapheme_break_stateful, except without support for the
211 * Unicode 9 additions to the algorithm. Supported for legacy reasons.
212 */213 externutf8proc_boolutf8proc_grapheme_break(utf8proc_int32_tcodepoint1,
214 utf8proc_int32_tcodepoint2);
215 216 /**
217 * Given a codepoint `c`, return the codepoint of the corresponding
218 * lower-case character, if any; otherwise (if there is no lower-case
219 * variant, or if `c` is not a valid codepoint) return `c`.
220 */221 externutf8proc_int32_tutf8proc_tolower(utf8proc_int32_tc);
222 223 /**
224 * Given a codepoint `c`, return the codepoint of the corresponding
225 * upper-case character, if any; otherwise (if there is no upper-case
226 * variant, or if `c` is not a valid codepoint) return `c`.
227 */228 externutf8proc_int32_tutf8proc_toupper(utf8proc_int32_tc);
229 230 /**
231 * Given a codepoint `c`, return the codepoint of the corresponding
232 * title-case character, if any; otherwise (if there is no title-case
233 * variant, or if `c` is not a valid codepoint) return `c`.
234 */235 externutf8proc_int32_tutf8proc_totitle(utf8proc_int32_tc);
236 237 /**
238 * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
239 * except that a width of 0 is returned for non-printable codepoints
240 * instead of -1 as in `wcwidth`.
241 *
242 * @note
243 * If you want to check for particular types of non-printable characters,
244 * (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */245 externintutf8proc_charwidth(utf8proc_int32_tcodepoint);
246 247 /**
248 * Return the Unicode category for the codepoint (one of the
249 * @ref utf8proc_category_t constants.)
250 */251 externutf8proc_category_tutf8proc_category(utf8proc_int32_tcodepoint);
252 253 /**
254 * Return the two-letter (nul-terminated) Unicode category string for
255 * the codepoint (e.g. `"Lu"` or `"Co"`).
256 */257 externconst(char)* utf8proc_category_string(utf8proc_int32_tcodepoint);
258 259 /**
260 * Maps the given UTF-8 string pointed to by `str` to a new UTF-8
261 * string, allocated dynamically by `malloc` and returned via `dstptr`.
262 *
263 * If the @ref UTF8PROC_NULLTERM flag in the `options` field is set,
264 * the length is determined by a NULL terminator, otherwise the
265 * parameter `strlen` is evaluated to determine the string length, but
266 * in any case the result will be NULL terminated (though it might
267 * contain NULL characters with the string if `str` contained NULL
268 * characters). Other flags in the `options` field are passed to the
269 * functions defined above, and regarded as described. See also
270 * @ref utfproc_map_custom to supply a custom codepoint transformation.
271 *
272 * In case of success the length of the new string is returned,
273 * otherwise a negative error code is returned.
274 *
275 * @note The memory of the new UTF-8 string will have been allocated
276 * with `malloc`, and should therefore be deallocated with `free`.
277 */278 externutf8proc_ssize_tutf8proc_map(constutf8proc_uint8_t* str,
279 utf8proc_ssize_tstrlen, utf8proc_uint8_t** dstptr, utf8proc_option_toptions);
280 281 /**
282 * Like @ref utf8proc_map, but also takes a `custom_func` mapping function
283 * that is called on each codepoint in `str` before any other transformations
284 * (along with a `custom_data` pointer that is passed through to `custom_func`).
285 * The `custom_func` argument is ignored if it is `NULL`.
286 */287 externutf8proc_ssize_tutf8proc_map_custom(constutf8proc_uint8_t* str,
288 utf8proc_ssize_tstrlen, utf8proc_uint8_t** dstptr,
289 utf8proc_option_toptions, utf8proc_custom_funccustom_func, void* custom_data);
290 291 /** @name Unicode normalization
292 *
293 * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
294 * normalized version of the null-terminated string `str`. These
295 * are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
296 * combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
297 */298 /** @{ */299 /** NFD normalization (@ref UTF8PROC_DECOMPOSE). */300 externutf8proc_uint8_t* utf8proc_NFD(constutf8proc_uint8_t* str);
301 /** NFC normalization (@ref UTF8PROC_COMPOSE). */302 externutf8proc_uint8_t* utf8proc_NFC(constutf8proc_uint8_t* str);
303 /** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */304 externutf8proc_uint8_t* utf8proc_NFKD(constutf8proc_uint8_t* str);
305 /** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */306 externutf8proc_uint8_t* utf8proc_NFKC(constutf8proc_uint8_t* str);
307 /** @} */308 }