GDevelop Core
Core library for developing platforms and tools compatible with GDevelop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
Classes | Public Types | Static Public Attributes | Related Functions | List of all members
gd::String Class Reference

String represents an UTF8 encoded string. More...

#include <String.h>

Classes

class  StringIterator
 

Public Types

using value_type = char32_t
 
using reference = char32_t &
 
using const_reference = const char32_t &
 
using pointer = char32_t *
 
using const_pointer = const char32_t *
 
using size_type = std::string::size_type
 
using difference_type = std::string::difference_type
 
using iterator = StringIterator< std::string::iterator >
 
using const_iterator = StringIterator< std::string::const_iterator >
 
using reverse_iterator = std::reverse_iterator< iterator >
 
using const_reverse_iterator = std::reverse_iterator< const_iterator >
 

Public Member Functions

Constructors
 String ()
 
 String (const char *characters)
 
 String (const std::u32string &string)
 
 String (const sf::String &string)
 
Assignment (implicit conversions)
Stringoperator= (const char *characters)
 
Stringoperator= (const sf::String &string)
 
Stringoperator= (const std::u32string &string)
 
Size
bool empty () const
 Returns true if the string is empty.
 
size_type size () const
 Returns the string's length.
 
size_type length () const
 Returns the string's length.
 
void clear ()
 Clear the string. More...
 
Iterators
String::iterator begin ()
 Get a beginning iterator.
 
String::const_iterator begin () const
 Get a constant beginning iterator.
 
String::iterator end ()
 Get a ending iterator.
 
String::const_iterator end () const
 Get a constant ending iterator.
 
Conversions to other string types
std::string ToLocale () const
 
std::u32string ToUTF32 () const
 
sf::String ToSfString () const
 
 operator sf::String () const
 
std::string ToUTF8 () const
 
std::wstring ToWide () const
 
UTF8 tools
bool IsValid () const
 
StringReplaceInvalid (value_type replacement=0xfffd)
 Searches the string for invalid characters and replaces them with replacement. More...
 
Element access / Internal string access
value_type operator[] (const size_type position) const
 Returns the code point at the specified position. More...
 
std::string & Raw ()
 Get the raw UTF8-encoded std::string.
 
const std::string & Raw () const
 Get the raw UTF8-encoded std::string.
 
const char * c_str () const
 Get the C-string.
 
String modifiers
Stringoperator+= (const String &other)
 
Stringoperator+= (const char *other)
 
Stringoperator+= (value_type character)
 
void push_back (value_type character)
 Add a character (from its codepoint) at the end of the String. More...
 
void pop_back ()
 Remove the last character of the String. More...
 
Stringinsert (size_type pos, const String &str)
 Inserts characters right before the character at pos. More...
 
Stringreplace (iterator i1, iterator i2, const String &str)
 Replace the portion of the String between i1 and i2 (i2 not included) by the String str. More...
 
Stringreplace (size_type pos, size_type len, const String &str)
 Replace the portion of the String between pos and pos + len (the character at pos + len is not included) More...
 
iterator erase (iterator first, iterator last)
 Erase the characters between first and last (last not included). More...
 
iterator erase (iterator p)
 Erase the character pointed by p. More...
 
void erase (size_type pos=0, size_type len=npos)
 Erase the characters between the positions pos and pos + len (pos + len not included). More...
 

Static Public Member Functions

Conversions from other string types
static String FromLocale (const std::string &localizedString)
 
static String FromUTF32 (const std::u32string &string)
 
static String FromSfString (const sf::String &sfString)
 
static String FromUTF8 (const std::string &utf8Str)
 
static String FromWide (const std::wstring &wstr)
 

Static Public Attributes

static constexpr size_type npos = -1
 

Related Functions

(Note that these are not member functions.)

bool GD_CORE_API CaseSensitiveEquiv (String lhs, String rhs, bool compat=true)
 
bool GD_CORE_API CaseInsensitiveEquiv (const String &lhs, const String &rhs, bool compat=true)
 
Non-member operators
String GD_CORE_API operator+ (String lhs, const String &rhs)
 
String GD_CORE_API operator+ (String lhs, const char *rhs)
 
String GD_CORE_API operator+ (const char *lhs, const String &rhs)
 
Relational operators
bool GD_CORE_API operator== (const String &lhs, const String &rhs)
 
bool GD_CORE_API operator== (const String &lhs, const char *rhs)
 
bool GD_CORE_API operator== (const char *lhs, const String &rhs)
 
bool GD_CORE_API operator!= (const String &lhs, const String &rhs)
 
bool GD_CORE_API operator!= (const String &lhs, const char *rhs)
 
bool GD_CORE_API operator!= (const char *lhs, const String &rhs)
 
bool GD_CORE_API operator< (const String &lhs, const String &rhs)
 
bool GD_CORE_API operator< (const String &lhs, const char *rhs)
 
bool GD_CORE_API operator< (const char *lhs, const String &rhs)
 
bool GD_CORE_API operator<= (const String &lhs, const String &rhs)
 
bool GD_CORE_API operator<= (const String &lhs, const char *rhs)
 
bool GD_CORE_API operator<= (const char *lhs, const String &rhs)
 
bool GD_CORE_API operator> (const String &lhs, const String &rhs)
 
bool GD_CORE_API operator> (const String &lhs, const char *rhs)
 
bool GD_CORE_API operator> (const char *lhs, const String &rhs)
 
bool GD_CORE_API operator>= (const String &lhs, const String &rhs)
 
bool GD_CORE_API operator>= (const String &lhs, const char *rhs)
 
bool GD_CORE_API operator>= (const char *lhs, const String &rhs)
 
Stream operators
std::ostream &GD_CORE_API operator<< (std::ostream &os, const String &str)
 
std::istream &GD_CORE_API operator>> (std::istream &is, String &str)
 

Convert from/to numbers

template<typename T >
To () const
 Method to convert the string to a number. More...
 
template<typename T >
static String From (T value)
 Method to create a gd::String from a number (float, double, int, ...) More...
 

String operations

enum  NormForm { NFD, NFC, NFKD, NFKC }
 
std::vector< StringSplit (value_type delimiter) const
 Split the string with a delimiter. More...
 
String CaseFold () const
 Returns the case-folded string. More...
 
String UpperCase () const
 Returns the string in uppercase. More...
 
String LowerCase () const
 Returns the string in lowercase. More...
 
String FindAndReplace (String search, String replacement, bool all=true) const
 Searches a string for a specified substring and returns a new string where all occurrences of this substring is replaced. More...
 
StringNormalize (NormForm form=NFC)
 
String substr (size_type start=0, size_type length=npos) const
 
size_type find (const String &search, size_type pos=0) const
 
size_type find (const char *search, size_type pos=0) const
 
size_type find (const value_type search, size_type pos=0) const
 
size_type rfind (const String &search, size_type pos=npos) const
 
size_type rfind (const char *search, size_type pos=npos) const
 
size_type rfind (const value_type &search, size_type pos=npos) const
 
size_type find_first_of (const String &match, size_type startPos=0) const
 Searches the string for the first character that matches any of the characters specified in its arguments. More...
 
size_type find_first_not_of (const String &not_match, size_type startPos=0) const
 Searches the string for the first character that doesn't match any of the characters specified in its arguments. More...
 
size_type find_last_of (const String &match, size_type endPos=npos) const
 Searches the string for the last character that matches any of the characters specified in its arguments. More...
 
size_type find_last_not_of (const String &not_match, size_type endPos=npos) const
 Searches the string for the last character that doesn't match any of the characters specified in its arguments. More...
 
int compare (const String &other) const
 Compares the current string with another.
 
size_type FindCaseInsensitive (const String &search, size_type pos=0) const
 Do a case-insensitive search. More...
 

Detailed Description

String represents an UTF8 encoded string.

This class represents an UTF8 encoded string. It provides almost the same features as the STL std::string class but is UTF8 aware (size() returns the number of characters, not the number of bytes for example).

What is UTF8 and Unicode ?

(from https://en.wikipedia.org/wiki/Unicode and https://en.wikipedia.org/wiki/UTF-8)

Unicode is a computing industry standard for the consistent encoding, representation, and handling of text expressed in most of the world's writing systems. Unicode can be implemented by different character encodings. The most commonly used encodings are UTF-8, UTF-16 and the now-obsolete UCS-2.

UTF-8 is a character encoding capable of encoding all possible characters, or code points, in Unicode. The encoding is variable-length (not every codepoint is 1 byte long) and uses 8-bit code units. It was designed for backward compatibility with ASCII. UTF-8 encodes each of the 1,112,064 valid code points in the Unicode code space using one to four 8-bit bytes (a group of 8 bits is known as an octet in the Unicode Standard). Code points with lower numerical values (i.e., earlier code positions in the Unicode character set, which tend to occur more frequently) are encoded using fewer bytes. The first 128 characters of Unicode, which correspond one-to-one with ASCII, are encoded using a single octet with the same binary value as ASCII, making valid ASCII text valid UTF-8-encoded Unicode as well.

Limitations

The String class stores internally the string as an UTF8 encoded std::string. It results in some limitations : it's impossible to edit a single character with operator[]() nor at() because the new character length might not be the same.

The gd::String class supports almost all Unicode characters, except the ones that can't be represented as a single codepoint (obviously, a codepoint can be represented by 1 to 4 bytes, as codepoints are encoded in UTF8). For examples, some special letters are composed of multiple codepoints (a letter, and the accents). Most of them can be combined into a single codepoint but some can't. These are the not supported ones. See Normalization.

Performance

The UTF8 encoding has the advantage to reduce the RAM consumption compared to UTF16 or UTF32 for strings using a lot of latin characters. But the characters variable length brings some performance issues compared to fixed size encoding. That's why the complexity of each methods is written in their documentation. For instance, the size() method is linear on the string size and so is the operator[]().

Conversions from/to other string types

The String handles implicit conversion with sf::String (implicit constructor and implicit conversion operator).

However, this is not the case with std::string as this conversion is not often lossless (mostly on Windows). You need to explicitly call gd::String::FromLocale or gd::String::FromUTF8 to convert a std::string to a String. However, if you want to get a String object from a string literal, you can directly use the operator=() or the constructor as they are supporting const char* as argument (it assumes the string literal is encoded in UTF8, so you'll need to put the u8 prefix).

Implicit conversion from/to sf::String

//Get a String from sf::String
sf::String sfmlStr("This is a test ! ");
gd::String str1(sfmlStr); //Now contains "This is a test ! " encoded in UTF8
//Get a sf::String from String
sf::String anotherSfmlString = str; //anotherSfmlString now contains "Another test ! "

Conversion from/to std::string

//Get a String from a std::string encoded in the current locale
std::string ansiStr = "Some beautiful localized characters. "; //Encoded in ANSI on Windows, UTF8 on Linux
//Create a String using a string literal encoded in UTF8
gd::String anotherStr = u8"This is an UTF8 string";
//The same as gd::String anotherStr = gd::utf8::FromUTF8(u8"This is an UTF8 string");
//But it works only with string literals.
gd::String finalStr = str + anotherStr; //Concatenates the two Strings
std::cout << finalStr.ToLocale() << std::endl //Shows "Some beautiful localized characters. This is an UTF8 string"

Normalization

This class stores Unicode strings normalized with NFC which means that all characters are combined (if they can). For example, the "à" character can be written in two ways according to the Unicode norm : U+00E0 (the "à" in a single codepoint) or U+0061 (the "a" letter codepoint) + U+0300 "the "`" combining accent. We say that they are canonically equivalent. However, this can cause problem when comparing strings, that's why this class normalizes the string when constructed using the Normalization Form Composition (all characters are combined, e.g. "à" is represented by a single codepoint). If the string is invalid when constructed, the string is not normalized : it will be normalized when the invalid characters will be removed using gd::String::ReplaceInvalid().

Case-insensitive comparison

In Unicode, uppercasing/lowercasing strings to compare them in a case-insensitive way is not recommended. That's why the function gd::CaseInsensitiveEquiv exists to compare two strings in a case-insensitive way.

Member Enumeration Documentation

Normalization form

Enumerator
NFD 

Normalization Form Decomposition: characters are decomposed by canonical equivalence, and multiple combining characters are arranged in a specific order.

NFC 

Normalization Form Composition: characters are decomposed and then recomposed by canonical equivalence.

NFKD 

Normalization Form Compatibility Decomposition: characters are decomposed by compatibility, and multiple combining characters are arranged in a specific order.

NFKC 

Normalization Form Compatibility Composition: characters are decomposed by compatibility, then recomposed by canonical equivalence.

Constructor & Destructor Documentation

gd::String::String ( )

Constructs an empty string.

gd::String::String ( const char *  characters)

Constructs a string from an array of char representing a string encoded in UTF8.

Useful to implicitly create a String object from a string literal.

Usage:

gd::String str(u8"A little sentence.");
gd::String::String ( const std::u32string &  string)

Constructs a String from a std::u32string.

Usage:

gd::String str(U"A UTF32 encoded string.");
gd::String::String ( const sf::String &  string)

Constructs a string from an sf::String.

Member Function Documentation

String gd::String::CaseFold ( ) const

Returns the case-folded string.

Note
This string is almost but not totally suitable for case-insensitive comparison because you have to make sure that it is normalized. So, to do a case-insensitive comparison, do :
str1.CaseFold().Normalize() == str2.CaseFold().Normalize()
You can also use gd::CaseInsensitiveEquiv();
void gd::String::clear ( )
inline

Clear the string.

Iterators : Obviously, all iterators are invalidated.

String::iterator gd::String::erase ( String::iterator  first,
String::iterator  last 
)

Erase the characters between first and last (last not included).

Parameters
firstan iterator to the first character to remove
lastan iterator to the character next to the last one to remove
Returns
an iterator pointing at the old position of the first deleted character
String::iterator gd::String::erase ( String::iterator  p)

Erase the character pointed by p.

Parameters
pan iterator pointing to the character to be erased
Returns
an interator pointing at the old position of the deleted character
void gd::String::erase ( String::size_type  pos = 0,
String::size_type  len = npos 
)

Erase the characters between the positions pos and pos + len (pos + len not included).

Parameters
posthe position of the first character to remove
lenthe number of characters to remove from pos
String::size_type gd::String::find ( const String search,
String::size_type  pos = 0 
) const
Returns
the position of the first occurence of search starting from pos.
String::size_type gd::String::find ( const char *  search,
String::size_type  pos = 0 
) const
Returns
the position of the first occurence of search starting from pos.
String::size_type gd::String::find ( const value_type  search,
String::size_type  pos = 0 
) const
Returns
the position of the first occurence of search starting from pos.
String::size_type gd::String::find_first_not_of ( const String not_match,
size_type  startPos = 0 
) const

Searches the string for the first character that doesn't match any of the characters specified in its arguments.

Parameters
not_matchthe characters that will be looked for in the String
startPoswhere to start the search
Returns
the position of the first found character
String::size_type gd::String::find_first_of ( const String match,
size_type  startPos = 0 
) const

Searches the string for the first character that matches any of the characters specified in its arguments.

Parameters
matchthe characters that will be looked for in the String
startPoswhere to start the search
Returns
the position of the first found character
String::size_type gd::String::find_last_not_of ( const String not_match,
size_type  endPos = npos 
) const

Searches the string for the last character that doesn't match any of the characters specified in its arguments.

Parameters
not_matchthe characters that will be looked for in the String
endPoswhere to end the search (this is the last character considered in the search)
Returns
the position of the last found character
String::size_type gd::String::find_last_of ( const String match,
size_type  endPos = npos 
) const

Searches the string for the last character that matches any of the characters specified in its arguments.

Parameters
matchthe characters that will be looked for in the String
endPoswhere to end the search (this is the last character considered in the search)
Returns
the position of the last found character
String gd::String::FindAndReplace ( String  search,
String  replacement,
bool  all = true 
) const

Searches a string for a specified substring and returns a new string where all occurrences of this substring is replaced.

Parameters
searchThe string that will be replaced by the new string.
replacementThe value to replace the old substring with.
allIf set to false, only the first matching substring will be replaced.
String::size_type gd::String::FindCaseInsensitive ( const String search,
size_type  pos = 0 
) const

Do a case-insensitive search.

Returns
the position of the first occurence of search starting from pos.
Note
This method isn't very efficient as it is linear on the string size times the search string size
template<typename T >
static String gd::String::From ( value)
inlinestatic

Method to create a gd::String from a number (float, double, int, ...)

Returns
a gd::String created from value.
String gd::String::FromLocale ( const std::string &  localizedString)
static
Returns
a String created from a std::string encoded in the current locale.

See Conversion from/to std::string for more information.

String gd::String::FromSfString ( const sf::String &  sfString)
static
Returns
a String created from a sf::String (UTF32).

See Implicit conversion from/to sf::String for more information.

String gd::String::FromUTF32 ( const std::u32string &  string)
static
Returns
a String created from a std::u32string.
String gd::String::FromUTF8 ( const std::string &  utf8Str)
static
Returns
a String created an UTF8 encoded std::string.
String gd::String::FromWide ( const std::wstring &  wstr)
static
Returns
a String created from a std::wstring (UTF32 on Linux and UCS-2 on Windows)
String & gd::String::insert ( size_type  pos,
const String str 
)

Inserts characters right before the character at pos.

Returns
*this

Iterators : All iterators may be invalidated.

bool gd::String::IsValid ( ) const
Returns
true if the string is valid.
String gd::String::LowerCase ( ) const

Returns the string in lowercase.

Note
Some characters that maps to multiple characters when lowercased may not be processed, e.g. double SS to etzett in german.
String & gd::String::Normalize ( String::NormForm  form = NFC)

Normalize the string using the normalization form form.

Returns
*this
gd::String::operator sf::String ( ) const

Implicit conversion operator to sf::String.

See Implicit conversion from/to sf::String for more information.

String & gd::String::operator= ( const char *  characters)

Assign the String using a string literal (it assumes that the string literal is encoded in UTF8).

Usage:

str = u8"This is a test string.";
String::value_type gd::String::operator[] ( const size_type  position) const

Returns the code point at the specified position.

Warning
This operator has a linear complexity on the character's position. You should avoid to use it in a loop and use the iterators provided by this class instead.
void gd::String::pop_back ( )

Remove the last character of the String.

**Iterators : ** All iterators may be invalidated (in particular if the string is reallocated).

void gd::String::push_back ( String::value_type  character)

Add a character (from its codepoint) at the end of the String.

**Iterators : ** All iterators may be invalidated (in particular if the string is reallocated).

String & gd::String::replace ( iterator  i1,
iterator  i2,
const String str 
)

Replace the portion of the String between i1 and i2 (i2 not included) by the String str.

Returns
*this

Iterators : All iterators may be invalidated.

String & gd::String::replace ( String::size_type  pos,
String::size_type  len,
const String str 
)

Replace the portion of the String between pos and pos + len (the character at pos + len is not included)

Returns
*this

Iterators : All iterators may be invalidated.

String & gd::String::ReplaceInvalid ( value_type  replacement = 0xfffd)

Searches the string for invalid characters and replaces them with replacement.

Returns
*this
String::size_type gd::String::rfind ( const String search,
String::size_type  pos = npos 
) const
Returns
the position of the last occurence of search starting before pos.
String::size_type gd::String::rfind ( const char *  search,
String::size_type  pos = npos 
) const
Returns
the position of the last occurence of search starting before pos.
String::size_type gd::String::rfind ( const value_type &  search,
String::size_type  pos = npos 
) const
Returns
the position of the last occurence of search starting before pos.
std::vector< String > gd::String::Split ( String::value_type  delimiter) const

Split the string with a delimiter.

Parameters
delimiterdelimiter (an UTF32 codepoint)
Returns
a std::vector containing all the gd::String objects

Usage:

gd::String str = u8"10;20;30;40";
std::vector<gd::String> splittedStr = str.Split(U';');
//the U prefix is mandatory to get a char32_t from the literal
//Now the vector contains "10", "20", "30" and "40" as gd::String objects
String gd::String::substr ( String::size_type  start = 0,
String::size_type  length = npos 
) const

Returns a sub-string starting from start and with length length.

template<typename T >
T gd::String::To ( ) const
inline

Method to convert the string to a number.

Returns
the string converted to the type T
std::string gd::String::ToLocale ( ) const
Returns
a localized std::string from the current string.

See Conversion from/to std::string for more information.

sf::String gd::String::ToSfString ( ) const
Returns
a sf::String from the current string.

See Implicit conversion from/to sf::String for more information.

std::u32string gd::String::ToUTF32 ( ) const
Returns
a std::u32string.
std::string gd::String::ToUTF8 ( ) const
Returns
a UTF8 encoded std::string from the current string.
std::wstring gd::String::ToWide ( ) const
Returns
a wide string (std::wstring) encoded in UTF32 on Linux and in UCS-2 on Windows
Note
On Windows, this is possibly a lossy conversion.
String gd::String::UpperCase ( ) const

Returns the string in uppercase.

Note
Some characters that maps to multiple characters when uppercased may not be processed, e.g. the german etzett.

Friends And Related Function Documentation

bool GD_CORE_API CaseInsensitiveEquiv ( const String lhs,
const String rhs,
bool  compat = true 
)
related
Parameters
compatif true, the strings are normalized using a compatibility normalization form to remove characters special appearance.
Returns
true if the two string are equivalent (in a case-insensitive way).
bool GD_CORE_API CaseSensitiveEquiv ( String  lhs,
String  rhs,
bool  compat = true 
)
related
Parameters
compatif true, the strings are normalized using a compatibility normalization form to remove characters special appearance.
Returns
true if the two string are equivalent (in a case-sensitive way).
bool GD_CORE_API operator!= ( const String lhs,
const String rhs 
)
related
bool GD_CORE_API operator!= ( const String lhs,
const char *  rhs 
)
related
bool GD_CORE_API operator!= ( const char *  lhs,
const String rhs 
)
related
String GD_CORE_API operator+ ( String  lhs,
const String rhs 
)
related
Returns
a String containing the concatenation of lhs and rhs.
String GD_CORE_API operator+ ( String  lhs,
const char *  rhs 
)
related
Returns
a String containing the concatenation of lhs and rhs (rhs is converted to gd::String assuming it's encoded in UTF8).
String GD_CORE_API operator+ ( const char *  lhs,
const String rhs 
)
related
Returns
a String containing the concatenation of lhs and rhs (lhs is converted to gd::String assuming it's encoded in UTF8).
bool GD_CORE_API operator< ( const String lhs,
const String rhs 
)
related
bool GD_CORE_API operator< ( const String lhs,
const char *  rhs 
)
related
bool GD_CORE_API operator< ( const char *  lhs,
const String rhs 
)
related
std::ostream &GD_CORE_API operator<< ( std::ostream &  os,
const String str 
)
related

Outputs the string in a stream.

Note
The string is converted to the current locale before. If you want to stream the string as UTF8, do :
std::cout << myString.Raw();
bool GD_CORE_API operator<= ( const String lhs,
const String rhs 
)
related
bool GD_CORE_API operator<= ( const String lhs,
const char *  rhs 
)
related
bool GD_CORE_API operator<= ( const char *  lhs,
const String rhs 
)
related
bool GD_CORE_API operator== ( const String lhs,
const String rhs 
)
related
bool GD_CORE_API operator== ( const String lhs,
const char *  rhs 
)
related
bool GD_CORE_API operator== ( const char *  lhs,
const String rhs 
)
related
bool GD_CORE_API operator> ( const String lhs,
const String rhs 
)
related
bool GD_CORE_API operator> ( const String lhs,
const char *  rhs 
)
related
bool GD_CORE_API operator> ( const char *  lhs,
const String rhs 
)
related
bool GD_CORE_API operator>= ( const String lhs,
const String rhs 
)
related
bool GD_CORE_API operator>= ( const String lhs,
const char *  rhs 
)
related
bool GD_CORE_API operator>= ( const char *  lhs,
const String rhs 
)
related
std::istream &GD_CORE_API operator>> ( std::istream &  is,
String str 
)
related

Extracts a string from an input string assuming the stream inputs characters encoded in the current locale.

Note
The content of the string is replaced.

The documentation for this class was generated from the following files: