CBMC
|
#include "unicode.h"
#include "invariant.h"
#include <codecvt>
#include <cstdint>
#include <iomanip>
#include <locale>
#include <sstream>
Go to the source code of this file.
Functions | |
static void | utf8_append_code (unsigned int c, std::string &result) |
Appends a unicode character to a utf8-encoded string. | |
std::string | narrow (const wchar_t *s) |
std::wstring | widen (const char *s) |
std::string | narrow (const std::wstring &s) |
std::wstring | widen (const std::string &s) |
std::string | utf32_native_endian_to_utf8 (const std::basic_string< char32_t > &s) |
std::vector< std::string > | narrow_argv (int argc, const wchar_t **argv_wide) |
static void | utf16_append_code (unsigned int code, std::wstring &result) |
std::wstring | utf8_to_utf16_native_endian (const std::string &in) |
Convert UTF8-encoded string to UTF-16 with architecture-native endianness. | |
std::u32string | utf8_to_utf32 (const std::string &utf8_str) |
Convert UTF8-encoded string to UTF-32 with architecture-native endianness. | |
static void | utf16_native_endian_to_java_string (const wchar_t ch, std::ostringstream &result, const std::locale &loc) |
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes. | |
static void | utf16_native_endian_to_java (const wchar_t ch, std::ostringstream &result, const std::locale &loc) |
Escapes non-printable characters, whitespace except for spaces, double- and single-quotes and backslashes. | |
std::string | utf16_native_endian_to_java (const char16_t ch) |
std::string | utf16_native_endian_to_java_string (const std::wstring &in) |
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes. | |
std::string | utf16_native_endian_to_utf8 (const char16_t utf16_char) |
std::string | utf16_native_endian_to_utf8 (const std::u16string &utf16_str) |
char16_t | codepoint_hex_to_utf16_native_endian (const std::string &hex) |
std::string | codepoint_hex_to_utf8 (const std::string &hex) |
hex | representation of a BMP codepoint as a four-digit string (e.g. "0041" for \u0041) |
Definition at line 378 of file unicode.cpp.
std::string codepoint_hex_to_utf8 | ( | const std::string & | hex | ) |
hex | representation of a BMP codepoint as a four-digit string (e.g. "0041" for \u0041) |
Definition at line 384 of file unicode.cpp.
std::string narrow | ( | const std::wstring & | s | ) |
Definition at line 64 of file unicode.cpp.
Definition at line 33 of file unicode.cpp.
Definition at line 148 of file unicode.cpp.
Definition at line 162 of file unicode.cpp.
ch | UTF-16 character in architecture-native endianness encoding |
Definition at line 335 of file unicode.cpp.
|
static |
Escapes non-printable characters, whitespace except for spaces, double- and single-quotes and backslashes.
This should yield a valid Java identifier.
ch | UTF-16 character in architecture-native endianness encoding |
result | stream to receive string in US-ASCII format, with \uxxxx escapes for other characters |
loc | locale to check for printable characters |
Definition at line 316 of file unicode.cpp.
std::string utf16_native_endian_to_java_string | ( | const std::wstring & | in | ) |
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.
This should yield a valid Java string literal. Note that this specifically does not escape single quotes, as these are not required to be escaped for Java string literals.
in | String in UTF-16 (native endianness) format |
Definition at line 350 of file unicode.cpp.
|
static |
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.
This should yield a valid Java string literal. Note that this specifically does not escape single quotes, as these are not required to be escaped for Java string literals.
ch | UTF-16 character in architecture-native endianness encoding |
result | stream to receive string in US-ASCII format, with \uxxxx escapes for other characters |
loc | locale to check for printable characters |
Definition at line 272 of file unicode.cpp.
std::string utf16_native_endian_to_utf8 | ( | char16_t | utf16_char | ) |
utf16_char | UTF-16 character in architecture-native endianness encoding |
Definition at line 359 of file unicode.cpp.
std::string utf16_native_endian_to_utf8 | ( | const std::u16string & | utf16_str | ) |
utf16_str | UTF-16 string in architecture-native endianness encoding |
Definition at line 364 of file unicode.cpp.
s | UTF-32 encoded wide string |
Definition at line 136 of file unicode.cpp.
Appends a unicode character to a utf8-encoded string.
Definition at line 110 of file unicode.cpp.
std::wstring utf8_to_utf16_native_endian | ( | const std::string & | in | ) |
Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
Definition at line 191 of file unicode.cpp.
std::u32string utf8_to_utf32 | ( | const std::string & | utf8_str | ) |
Convert UTF8-encoded string to UTF-32 with architecture-native endianness.
Definition at line 205 of file unicode.cpp.
Definition at line 49 of file unicode.cpp.
std::wstring widen | ( | const std::string & | s | ) |
Definition at line 87 of file unicode.cpp.