首页 > 操作系统 >

c 链表的定义和使用 cjson 源码阅读笔记(7)

电脑杂谈　发布时间：2017-12-31 09:23:46　来源：网络整理

当然，skip 用于用于忽略空白，这里跳过了 ascii 值小于 32 的。

/* Parser core - when encountering text, process appropriately. */
static const char *parse_value(cJSON *item,const char *value) {
    if (!value)return 0;/* Fail on null. */
    if (!strncmp(value,"null",4)) {
        item->type=cJSON_NULL;
        return value+4;
    }
    if (!strncmp(value,"false",5)) {
        item->type=cJSON_False;
        return value+5;
    }
    if (!strncmp(value,"true",4)) {
        item->type=cJSON_True;
        item->valueint=1;
        return value+4;
    }
    if (*value=='\"') {
        return parse_string(item,value);
    }
    if (*value=='-' || (*value>='0' && *value<='9')) {
        return parse_number(item,value);
    }
    if (*value=='[') {
        return parse_array(item,value);
    }
    if (*value=='{') {
        return parse_object(item,value);
    }
    ep=value;
    return 0;/* failure. */
}

parse_value 的实现方式很简单，根据前几个字符来判断写一个类型是什么。

如果是 null, false 或 true 设置类型，并返回偏移指针。

如果是其他的，则进入对应的函数中。

解析字符串时，对于特殊字符也应该转义，比如 "\\n" 字符应该转换为 '\n' 这个换行符。

当然，如果只有特殊字符转换的话，代码不会又这么长，对于字符串，还要支持非 ascii 码的字符，即 utf8字符。

这些字符在字符串中会编码为 \uXXXX 的字符串，我们现在需要还原为 0-255 的一个字符。

static unsigned parse_hex4(const char *str) {
    unsigned h=0;
    if (*str>='0' && *str<='9') h+=(*str)-'0';
    else if (*str>='A' && *str<='F') h+=10+(*str)-'A';
    else if (*str>='a' && *str<='f') h+=10+(*str)-'a';
    else return 0;
    h=h<<4;
    str++;
    if (*str>='0' && *str<='9') h+=(*str)-'0';
    else if (*str>='A' && *str<='F') h+=10+(*str)-'A';
    else if (*str>='a' && *str<='f') h+=10+(*str)-'a';
    else return 0;
    h=h<<4;
    str++;
    if (*str>='0' && *str<='9') h+=(*str)-'0';
    else if (*str>='A' && *str<='F') h+=10+(*str)-'A';
    else if (*str>='a' && *str<='f') h+=10+(*str)-'a';
    else return 0;
    h=h<<4;
    str++;
    if (*str>='0' && *str<='9') h+=(*str)-'0';
    else if (*str>='A' && *str<='F') h+=10+(*str)-'A';
    else if (*str>='a' && *str<='f') h+=10+(*str)-'a';
    else return 0;
    return h;
}
/* Parse the input text into an unescaped cstring, and populate item. */
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static const char *parse_string(cJSON *item,const char *str) {
    const char *ptr=str+1;
    char *ptr2;
    char *out;
    int len=0;
    unsigned uc,uc2;
    if (*str!='\"') {
        ep=str;    /* not a string! */
        return 0;
    }
    while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++;/* Skip escaped quotes. */
    out=(char*)cJSON_malloc(len+1);/* This is how long we need for the string, roughly. */
    if (!out) return 0;
    ptr=str+1;
    ptr2=out;
    while (*ptr!='\"' && *ptr) {
        if (*ptr!='\\') *ptr2++=*ptr++;
        else {
            ptr++;
            switch (*ptr) {
            case 'b':
                *ptr2++='\b';
                break;
            case 'f':
                *ptr2++='\f';
                break;
            case 'n':
                *ptr2++='\n';
                break;
            case 'r':
                *ptr2++='\r';
                break;
            case 't':
                *ptr2++='\t';
                break;
            case 'u': /* transcode utf16 to utf8. */
                uc=parse_hex4(ptr+1);
                ptr+=4;/* get the unicode char. */
                if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)break;/* check for invalid.*/
                if (uc>=0xD800 && uc<=0xDBFF) {/* UTF16 surrogate pairs.*/
                    if (ptr[1]!='\\' || ptr[2]!='u')break;/* missing second-half of surrogate.*/
                    uc2=parse_hex4(ptr+3);
                    ptr+=6;
                    if (uc2<0xDC00 || uc2>0xDFFF)break;/* invalid second-half of surrogate.*/
                    uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
                }
                len=4;
                if (uc<0x80) len=1;
                else if (uc<0x800) len=2;
                else if (uc<0x10000) len=3;
                ptr2+=len;
                switch (len) {
                case 4:
                    *--ptr2 =((uc | 0x80) & 0xBF);
                    uc >>= 6;
                case 3:
                    *--ptr2 =((uc | 0x80) & 0xBF);
                    uc >>= 6;
                case 2:
                    *--ptr2 =((uc | 0x80) & 0xBF);
                    uc >>= 6;
                case 1:
                    *--ptr2 =(uc | firstByteMark[len]);
                }
                ptr2+=len;
                break;
            default:
                *ptr2++=*ptr;
                break;
            }
            ptr++;
        }
    }
    *ptr2=0;
    if (*ptr=='\"') ptr++;
    item->valuestring=out;
    item->type=cJSON_String;
    return ptr;
}

本文来自电脑杂谈，转载请注明本文网址：
http://www.pc-fly.com/a/jisuanjixue/article-57098-7.html

相关阅读

发表评论　　请自觉遵守互联网相关的政策法规，严禁发布、暴力、反动的言论

唐肃宗

来真格的让你有来无回

2026年06月04日回复顶转发
何雪梅

日本国土只有中国三十分之一

2026年06月04日回复顶转发

每日福利

热点图片

热点排行