Index

Source code

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

size_t parse_escape_sequences(char * dst, char * src)
{
    char * i = src;
    char * j = dst;
    
    while(*i != '\0')
    {
        if(*i == '\\')
        {
            ++i;
            
            if(*i == '\\')
            {
                *j = '\\';
                ++j;
            }
            else if(*i == 'U')
            {
                // hexadecimal representation (8 bytes)
                
                ++i;
                
                char buf[9];
                
                buf[0] = *(i++);
                buf[1] = *(i++);
                buf[2] = *(i++);
                buf[3] = *(i++);
                buf[4] = *(i++);
                buf[5] = *(i++);
                buf[6] = *(i++);
                buf[7] = *(i++);
                buf[8] = '\0';
                
                uint32_t value = strtoul(buf, NULL, 16);
                *j = value;
                ++j;
                value >>= 8;
                if(value != 0)
                {
                    *j = value;
                    ++j;
                }
                value >>= 8;
                if(value != 0)
                {
                    *j = value;
                    ++j;
                }
                value >>= 8;
                if(value != 0)
                {
                    *j = value;
                    ++j;
                }
            }
            else if(*i == 'u')
            {
                // hexadecimal representation (4 bytes)
                
                ++i;
                
                char buf[5];
                
                buf[0] = *(i++);
                buf[1] = *(i++);
                buf[2] = *(i++);
                buf[3] = *(i++);
                buf[4] = '\0';
                
                uint16_t value = strtoul(buf, NULL, 16);
                *j = value;
                ++j;
                value >>= 8;
                if(value != 0)
                {
                    *j = value;
                    ++j;
                }
            }
            else if(*i == 'x')
            {
                // hexadecimal representation (2 bytes)
                
                ++i;
                
                char buf[3];
                
                buf[0] = *(i++);
                buf[1] = *(i++);
                buf[2] = '\0';
                
                *j = strtoul(buf, NULL, 16);
                ++j;
            }
            else if(*i >= '0' && *i <= '9')
            {
                // octal representation (3 bytes)
                
                char * end;
                *j = strtoul(i, &end, 8);
                ++j;
                
                i = end;
            }
            else if(*i == '\'') { *j = '\''; ++j; ++i; }
            else if(*i == '"') { *j = '\"'; ++j; ++i; }
            else if(*i == '?') { *j = '\?'; ++j; ++i; }
            else if(*i == '\\') { *j = '\\'; ++j; ++i; }
            else if(*i == 'a') { *j = '\a'; ++j; ++i; }
            else if(*i == 'b') { *j = '\b'; ++j; ++i; }
            else if(*i == 'f') { *j = '\f'; ++j; ++i; }
            else if(*i == 'n') { *j = '\n'; ++j; ++i; }
            else if(*i == 'r') { *j = '\r'; ++j; ++i; }
            else if(*i == 't') { *j = '\t'; ++j; ++i; }
            else if(*i == 'v') { *j = '\v'; ++j; ++i; }
            else
            {
                ++i;
            }
        }
        else
        {
            // copy char
            *j = *i;
            ++j;
            ++i;
        }
    }
    
    *j = *i;
    
    return j - dst;
}

int main()
{
    const char * src = "0: \\U00000078 1: \\u0078 2: \\x78 3: \\0 4: ...";
    char dst[strlen(src)];
    
    parse_escape_sequences(dst, (char *) src);
    
    printf("src: '%s', dst: '%s'\n", src, dst);
}

Output

rc: '0: \U00000078 1: \u0078 2: \x78 3: \0 4: ...', dst: '0: x 1: x 2: x 3: '