|
41 | 41 | #include <limits> |
42 | 42 | #include <memory> |
43 | 43 | #include <string> |
| 44 | +#include <type_traits> |
44 | 45 | #include <utility> |
| 46 | +#include <vector> |
45 | 47 |
|
46 | 48 | #include "final/eventloop/pipedata.h" |
47 | 49 |
|
@@ -256,47 +258,125 @@ using enable_if_arithmetic_without_char_t = |
256 | 258 | && ! std::is_same<char, NumT>::value |
257 | 259 | , std::nullptr_t>; |
258 | 260 |
|
259 | | -// UTF8_Char |
260 | | -//---------------------------------------------------------------------- |
261 | | -struct FourByteData |
| 261 | +// UTF-8 encoding |
| 262 | +namespace UTF8 |
262 | 263 | { |
263 | | - char byte1; // First character |
264 | | - char byte2; // Second character |
265 | | - char byte3; // Third character |
266 | | - char byte4; // Fourth character |
267 | | -}; |
268 | 264 |
|
269 | | -struct UTF8_Char |
| 265 | +inline void expand (std::vector<char>& buffer, std::size_t addend) |
270 | 266 | { |
271 | | - // Data member |
272 | | - FourByteData u8; |
273 | | - uInt32 length; |
| 267 | + buffer.resize(buffer.size() + addend); |
| 268 | +} |
274 | 269 |
|
275 | | - // Friend Non-member operator functions |
276 | | - friend constexpr auto operator == ( const UTF8_Char& lhs |
277 | | - , const UTF8_Char& rhs ) noexcept -> bool |
| 270 | +inline void expand (std::array<char, 4>&, std::size_t) |
| 271 | +{ } |
| 272 | + |
| 273 | +template <typename T> |
| 274 | +using DecayedT = typename std::decay<T>::type; |
| 275 | + |
| 276 | +template <typename CharBufferT> |
| 277 | +using uInt32_if_vector_or_array = std::enable_if_t< |
| 278 | + std::is_same<DecayedT<CharBufferT>, std::vector<char>>::value |
| 279 | + || std::is_same<DecayedT<CharBufferT>, std::array<char, 4>>::value |
| 280 | + , uInt32>; |
| 281 | + |
| 282 | +#if defined(__CYGWIN__) |
| 283 | + |
| 284 | +template <typename CharBufferT> |
| 285 | +inline auto encode (wchar_t ucs, CharBufferT& buffer) -> uInt32_if_vector_or_array<CharBufferT> |
| 286 | +{ |
| 287 | + // Writes UTF-8 bytes to the target array and returns the length |
| 288 | + const auto index = std::is_same<CharBufferT, std::vector<char>>::value |
| 289 | + ? buffer.size() |
| 290 | + : 0; |
| 291 | + |
| 292 | + // 1 Byte (7-bit): 0xxxxxxx |
| 293 | + if ( ucs < 0x80 ) |
278 | 294 | { |
279 | | - if ( lhs.length != rhs.length ) |
280 | | - return false; |
| 295 | + expand(buffer, 1); |
| 296 | + const auto dest = &buffer[index]; |
| 297 | + dest[0] = char(ucs); |
| 298 | + return 1; |
| 299 | + } |
| 300 | + |
| 301 | + // 2 byte (11-bit): 110xxxxx 10xxxxxx |
| 302 | + if ( ucs < 0x800 ) |
| 303 | + { |
| 304 | + expand(buffer, 2); |
| 305 | + const auto dest = &buffer[index]; |
| 306 | + dest[0] = char(0xc0 | uChar(ucs >> 6u)); |
| 307 | + dest[1] = char(0x80 | uChar(ucs & 0x3f)); |
| 308 | + return 2; |
| 309 | + } |
| 310 | + |
| 311 | + // 3 byte (16-bit): 1110xxxx 10xxxxxx 10xxxxxx |
| 312 | + expand(buffer, 3); |
| 313 | + const auto dest = &buffer[index]; |
| 314 | + dest[0] = char(0xe0 | uChar(ucs >> 12u)); |
| 315 | + dest[1] = char(0x80 | uChar((ucs >> 6u) & 0x3f)); |
| 316 | + dest[2] = char(0x80 | uChar(ucs & 0x3f)); |
| 317 | + return 3; |
| 318 | +} |
281 | 319 |
|
282 | | -#if HAVE_BUILTIN(__builtin_bit_cast) |
283 | | - return __builtin_bit_cast(uInt32, lhs.u8) == __builtin_bit_cast(uInt32, rhs.u8); |
284 | 320 | #else |
285 | | - uInt32 lhs_bytes{}; |
286 | | - uInt32 rhs_bytes{}; |
287 | | - std::memcpy(&lhs_bytes, &lhs.u8, sizeof(uInt32)); |
288 | | - std::memcpy(&rhs_bytes, &rhs.u8, sizeof(uInt32)); |
289 | | - return lhs_bytes == rhs_bytes; |
290 | | -#endif |
| 321 | + |
| 322 | +template <typename CharBufferT> |
| 323 | +inline auto encode (wchar_t ucs, CharBufferT& buffer) -> uInt32_if_vector_or_array<CharBufferT> |
| 324 | +{ |
| 325 | + // Writes UTF-8 bytes to the target array and returns the length |
| 326 | + const auto index = std::is_same<CharBufferT, std::vector<char>>::value |
| 327 | + ? buffer.size() |
| 328 | + : 0; |
| 329 | + |
| 330 | + // 1 Byte (7-bit): 0xxxxxxx |
| 331 | + if ( ucs < 0x80 ) |
| 332 | + { |
| 333 | + expand(buffer, 1); |
| 334 | + const auto dest = &buffer[index]; |
| 335 | + dest[0] = char(ucs); |
| 336 | + return 1; |
291 | 337 | } |
292 | 338 |
|
293 | | - friend constexpr auto operator != ( const UTF8_Char& lhs |
294 | | - , const UTF8_Char& rhs ) noexcept -> bool |
| 339 | + // 2 byte (11-bit): 110xxxxx 10xxxxxx |
| 340 | + if ( ucs < 0x800 ) |
295 | 341 | { |
296 | | - return ! ( lhs == rhs ); |
| 342 | + expand(buffer, 2); |
| 343 | + const auto dest = &buffer[index]; |
| 344 | + dest[0] = char(0xc0 | uChar(ucs >> 6u)); |
| 345 | + dest[1] = char(0x80 | uChar(ucs & 0x3f)); |
| 346 | + return 2; |
| 347 | + } |
| 348 | + |
| 349 | + // 3 byte (16-bit): 1110xxxx 10xxxxxx 10xxxxxx |
| 350 | + if ( ucs < 0x10000 ) |
| 351 | + { |
| 352 | + expand(buffer, 3); |
| 353 | + const auto dest = &buffer[index]; |
| 354 | + dest[0] = char(0xe0 | uChar(ucs >> 12u)); |
| 355 | + dest[1] = char(0x80 | uChar((ucs >> 6u) & 0x3f)); |
| 356 | + dest[2] = char(0x80 | uChar(ucs & 0x3f)); |
| 357 | + return 3; |
297 | 358 | } |
| 359 | + |
| 360 | + // 4 byte (21-bit): 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
| 361 | + if ( ucs < 0x200000 ) |
| 362 | + { |
| 363 | + expand(buffer, 4); |
| 364 | + const auto dest = &buffer[index]; |
| 365 | + dest[0] = char(0xf0 | uChar(ucs >> 18u)); |
| 366 | + dest[1] = char(0x80 | uChar((ucs >> 12u) & 0x3f)); |
| 367 | + dest[2] = char(0x80 | uChar((ucs >> 6u) & 0x3f)); |
| 368 | + dest[3] = char(0x80 | uChar(ucs & 0x3f)); |
| 369 | + return 4; |
| 370 | + } |
| 371 | + |
| 372 | + return encode(L'�', buffer); // Invalid character |
| 373 | +} |
| 374 | + |
| 375 | +#endif |
| 376 | + |
298 | 377 | }; |
299 | 378 |
|
| 379 | + |
300 | 380 | // FCharAttribute + FAttribute |
301 | 381 | //---------------------------------------------------------------------- |
302 | 382 | struct FCharAttribute |
|
0 commit comments