To Codepoint typedef unsigned int16 UTF16; typedef unsigned int32 UTF32; // constants const UTF32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10); const UTF32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; // computations UTF16 lead = LEAD_OFFSET + (codepoint >> 10); UTF16 trail = 0xDC00 + (codepoint & 0x3FF); UTF32 codepoint = (lead << 10) + trail + SURROGATE_OFFSET; Surrogate Pair