您的位置:首页 > 其它

cJONS序列化工具解读二(数据解析)

2016-05-14 15:48 507 查看

cJSON数据解析

关于数据解析部分,其实这个解析就是个自动机,通过递归或者解析栈进行实现数据的解析

/* Utility to jump whitespace and cr/lf */
//用于跳过ascii小于32的空白字符
static const char *skip(const char *in)
{
while (in && *in && (unsigned char)*in <= 32)
in++;
return in;
}

/* Parse an object - create a new root, and populate. */
cJSON *cJSON_ParseWithOpts(const char *value, const char **return_parse_end, int require_null_terminated)
{
const char *end = 0;
cJSON *c = cJSON_New_Item();
ep = 0;
if (!c)
return 0;       /* memory fail */

//根据前几个字符设置c类型并更新读取位置为end
end = parse_value(c, skip(value));
if (!end)
{
cJSON_Delete(c);    //解析失败,数据不完整
return 0;
}    /* parse failure. ep is set. */

/* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */
if (require_null_terminated)///??
{
end = skip(end);
if (*end)
{
cJSON_Delete(c);
ep = end;
return 0;
}
}
if (return_parse_end)
*return_parse_end = end;
return c;
}
/* Default options for cJSON_Parse */
cJSON *cJSON_Parse(const char *value) { return cJSON_ParseWithOpts(value, 0, 0); }


①关于重点部分parse_value 对类型解读函数



/* Parser core - when encountering text, process appropriately. */
//将输入字符串解析为具体类型cJSON结构
static const char *parse_value(cJSON *item, const char *value)
{
if (!value)
return 0;    /* Fail on null. */

  //设置结构的具体类型并且返回下一个将要解读数据的位置
if (!strncmp(value, "null", 4)) { item->type = cJSON_NULL;  return value + 4; }
if (!strncmp(value, "false", 5)) { item->type = cJSON_False; return value + 5; }
if (!strncmp(value, "true", 4)) { item->type = cJSON_True; item->valueint = 1;    return value + 4; }
if (*value == '\"') { return parse_string(item, value); }
if (*value == '-' || (*value >= '0' && *value <= '9')) { return parse_number(item, value); }
if (*value == '[') { return parse_array(item, value); }
if (*value == '{') { return parse_object(item, value); }

ep = value; return 0;    /* failure. */
}


②解析字符串部分
解析字符串时, 对于特殊字符也应该转义,比如 "n" 字符应该转换为 'n' 这个换行符。
当然,如果只有特殊字符转换的话,代码不会又这么长, 对于字符串, 还要支持非 ascii 码的字符, 即 utf8字符。
这些字符在字符串中会编码为 uXXXX 的字符串, 我们现在需要还原为 0 - 255 的一个字符。



static unsigned parse_hex4(const char *str)
{
unsigned h = 0;
if (*str >= '0' && *str <= '9')
h += (*str) - '0';
else if (*str >= 'A' && *str <= 'F')
h += 10 + (*str) - 'A';
else if (*str >= 'a' && *str <= 'f')
h += 10 + (*str) - 'a';
else
return 0;

h = h << 4; //*F
str++;
if (*str >= '0' && *str <= '9')
h += (*str) - '0';
else if (*str >= 'A' && *str <= 'F')
h += 10 + (*str) - 'A';
else if (*str >= 'a' && *str <= 'f')
h += 10 + (*str) - 'a';
else
return 0;

h = h << 4;
str++;
if (*str >= '0' && *str <= '9')
h += (*str) - '0';
else if (*str >= 'A' && *str <= 'F')
h += 10 + (*str) - 'A';
else if (*str >= 'a' && *str <= 'f')
h += 10 + (*str) - 'a';
else return 0;

h = h << 4;
str++;
if (*str >= '0' && *str <= '9')
h += (*str) - '0';
else if (*str >= 'A' && *str <= 'F')
h += 10 + (*str) - 'A';
else if (*str >= 'a' && *str <= 'f')
h += 10 + (*str) - 'a';
else
return 0;
return h;
}

/* Parse the input text into an unescaped cstring, and populate item. */
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static const char *parse_string(cJSON *item, const char *str)
{
const char *ptr = str + 1;
char *ptr2;
char *out;
int len = 0;
unsigned uc, uc2;
if (*str != '\"')
{
ep = str;
return 0;
}    /* not a string! */

while(*ptr != '\"' && *ptr && ++len)
if (*ptr++ == '\\') //跳过\续行符
ptr++;    /* Skip escaped quotes. */

//空间申请
out = (char*)cJSON_malloc(len + 1);    /* This is how long we need for the string, roughly. */
if (!out)
return 0;

ptr = str + 1;//跳过“开始
ptr2 = out;
while (*ptr != '\"' && *ptr)
{
if (*ptr != '\\')
*ptr2++ = *ptr++;
else    //转义字符处理
{
ptr++;
switch (*ptr)
{
case 'b': *ptr2++ = '\b';    break;
case 'f': *ptr2++ = '\f';    break;
case 'n': *ptr2++ = '\n';    break;
case 'r': *ptr2++ = '\r';    break;
case 't': *ptr2++ = '\t';    break;
case 'u':     /* transcode utf16 to utf8. */
uc = parse_hex4(ptr + 1);
ptr += 4;    /* get the unicode char. */

if ((uc >= 0xDC00 && uc <= 0xDFFF) || uc == 0)
break;    /* check for invalid.    */

if (uc >= 0xD800 && uc <= 0xDBFF)    /* UTF16 surrogate pairs.    */
{
if (ptr[1] != '\\' || ptr[2] != 'u')
break;    /* missing second-half of surrogate.    */
uc2 = parse_hex4(ptr + 3);
ptr += 6;
if (uc2<0xDC00 || uc2>0xDFFF)
break;    /* invalid second-half of surrogate.    */
uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF));
}

len = 4;
if (uc<0x80)
len = 1;
else if (uc<0x800)
len = 2;
else if (uc<0x10000)
len = 3;
ptr2 += len;

switch (len)
{
case 4:
*--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6;
case 3:
*--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6;
case 2:
*--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6;
case 1:
*--ptr2 = (uc | firstByteMark[len]);
}
ptr2 += len;
break;
default:
*ptr2++ = *ptr; break;
}
ptr++;
}
}
*ptr2 = 0;
if (*ptr == '\"') ptr++;
item->valuestring = out;
item->type = cJSON_String;
return ptr;
}


关于具体的字符解析中的编码相关问题,请自行阅读编码相关知识

③数字解析



/* Parse the input text to generate a number, and populate the result into item. */
static const char *parse_number(cJSON *item, const char *num)
{
double n = 0, sign = 1, scale = 0;
int subscale = 0,
signsubscale = 1;

if (*num == '-')
sign = -1, num++;    /* Has sign? */
if (*num == '0')
num++;            /* is zero */
if (*num >= '1' && *num <= '9')
do
{
n = (n*10.0) + (*num++ - '0');
}while (*num >= '0' && *num <= '9');    /* Number? */
if (*num == '.' && num[1] >= '0' && num[1] <= '9')
{
num++;
do
n = (n*10.0) + (*num++ - '0'), scale--;
while (*num >= '0' && *num <= '9');
}    /* Fractional part? */
if (*num == 'e' || *num == 'E')        /* Exponent? */
{
num++;
if (*num == '+')
num++;
else if (*num == '-')
signsubscale = -1, num++;        /* With sign? */
while (*num >= '0' && *num <= '9')
subscale = (subscale * 10) + (*num++ - '0');    /* Number? */
}

n = sign*n*pow(10.0, (scale + subscale*signsubscale));    /* number = +/- number.fraction * 10^+/- exponent */

item->valuedouble = n;
item->valueint = (int)n;
item->type = cJSON_Number;
return num;
}


④解析数组
解析数组, 需要先遇到 '[' 这个符号, 然后挨个的读取节点内容, 节点使用 ',' 分隔, ',' 前后还可能有空格, 最后以 ']' 结尾。
我们要编写的也是这样。
先创建一个数组对象, 判断是否有儿子, 有的话读取第一个儿子, 然后判断是不是有 逗号, 有的话循环读取后面的儿子。
最后读取 ']' 即可。



/* Build an array from input text. */
static const char *parse_array(cJSON *item, const char *value)
{
cJSON *child;
if (*value != '[')
{
ep = value;
return 0;
}    /* not an array! */

item->type = cJSON_Array;
value = skip(value + 1);
if (*value == ']')
return value + 1;    /* empty array. */

item->child = child = cJSON_New_Item();
if (!item->child)
return 0;         /* memory fail */
//解析数组内结构
value = skip(parse_value(child, skip(value)));    /* skip any spacing, get the value. */
if (!value) return 0;

while (*value == ',')
{
cJSON *new_item;
if (!(new_item = cJSON_New_Item())) return 0;     /* memory fail */

child->next = new_item;
new_item->prev = child;
child = new_item;
value = skip(parse_value(child, skip(value + 1)));
if (!value)
return 0;    /* memory fail */
}

if (*value == ']')
return value + 1;    /* end of array */
ep = value;
return 0;    /* malformed. */
}


⑤解析对象

解析对象和解析数组类似, 只不过对象的一个儿子是个 key - value, key 是字符串, value 可能是任何值, key 和 value 用 ":" 分隔。



/* Render an object to text. */
static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p)
{
char **entries = 0, **names = 0;
char *out = 0, *ptr, *ret, *str; int len = 7, i = 0, j;
cJSON *child = item->child;
int numentries = 0, fail = 0;
size_t tmplen = 0;
/* Count the number of entries. */
while (child) numentries++, child = child->next;
/* Explicitly handle empty object case */
if (!numentries)
{
if (p) out = ensure(p, fmt ? depth + 4 : 3);
else    out = (char*)cJSON_malloc(fmt ? depth + 4 : 3);
if (!out)    return 0;
ptr = out; *ptr++ = '{';
if (fmt) { *ptr++ = '\n'; for (i = 0; i<depth - 1; i++) *ptr++ = '\t'; }
*ptr++ = '}'; *ptr++ = 0;
return out;
}
if (p)
{
/* Compose the output: */
i = p->offset;
len = fmt ? 2 : 1;    ptr = ensure(p, len + 1);    if (!ptr) return 0;
*ptr++ = '{';    if (fmt) *ptr++ = '\n';    *ptr = 0;    p->offset += len;
child = item->child; depth++;
while (child)
{
if (fmt)
{
ptr = ensure(p, depth);    if (!ptr) return 0;
for (j = 0; j<depth; j++) *ptr++ = '\t';
p->offset += depth;
}
print_string_ptr(child->string, p);
p->offset = update(p);

len = fmt ? 2 : 1;
ptr = ensure(p, len);    if (!ptr) return 0;
*ptr++ = ':'; if (fmt) *ptr++ = '\t';
p->offset += len;

print_value(child, depth, fmt, p);
p->offset = update(p);

len = (fmt ? 1 : 0) + (child->next ? 1 : 0);
ptr = ensure(p, len + 1); if (!ptr) return 0;
if (child->next) *ptr++ = ',';
if (fmt) *ptr++ = '\n'; *ptr = 0;
p->offset += len;
child = child->next;
}
ptr = ensure(p, fmt ? (depth + 1) : 2);     if (!ptr) return 0;
if (fmt)    for (i = 0; i<depth - 1; i++) *ptr++ = '\t';
*ptr++ = '}'; *ptr = 0;
out = (p->buffer) + i;
}
else
{
/* Allocate space for the names and the objects */
entries = (char**)cJSON_malloc(numentries * sizeof(char*));
if (!entries) return 0;
names = (char**)cJSON_malloc(numentries * sizeof(char*));
if (!names) { cJSON_free(entries); return 0; }
memset(entries, 0, sizeof(char*)*numentries);
memset(names, 0, sizeof(char*)*numentries);

/* Collect all the results into our arrays: */
child = item->child; depth++; if (fmt) len += depth;
while (child)
{
names[i] = str = print_string_ptr(child->string, 0);
entries[i++] = ret = print_value(child, depth, fmt, 0);
if (str && ret) len += strlen(ret) + strlen(str) + 2 + (fmt ? 2 + depth : 0); else fail = 1;
child = child->next;
}

/* Try to allocate the output string */
if (!fail)    out = (char*)cJSON_malloc(len);
if (!out) fail = 1;

/* Handle failure */
if (fail)
{
for (i = 0; i<numentries; i++) { if (names[i]) cJSON_free(names[i]); if (entries[i]) cJSON_free(entries[i]); }
cJSON_free(names); cJSON_free(entries);
return 0;
}

/* Compose the output: */
*out = '{'; ptr = out + 1; if (fmt)*ptr++ = '\n'; *ptr = 0;
for (i = 0; i<numentries; i++)
{
if (fmt) for (j = 0; j<depth; j++) *ptr++ = '\t';
tmplen = strlen(names[i]); memcpy(ptr, names[i], tmplen); ptr += tmplen;
*ptr++ = ':'; if (fmt) *ptr++ = '\t';
strcpy(ptr, entries[i]); ptr += strlen(entries[i]);
if (i != numentries - 1) *ptr++ = ',';
if (fmt) *ptr++ = '\n'; *ptr = 0;
cJSON_free(names[i]); cJSON_free(entries[i]);
}

cJSON_free(names); cJSON_free(entries);
if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = '\t';
*ptr++ = '}'; *ptr++ = 0;
}
return out;
}


这样都实现后, 字符串解析为 json 对象就实现了。

⑥序列化

序列化也就是格式化输出了。

序列化又分为格式化输出,压缩输出

/* Render a cJSON item/entity/structure to text. */
char *cJSON_Print(cJSON *item)
{
return print_value(item, 0, 1, 0);
}
char *cJSON_PrintUnformatted(cJSON *item)
{
return print_value(item, 0, 0, 0);
}

char *cJSON_PrintBuffered(cJSON *item, int prebuffer, int fmt)
{
printbuffer p;
p.buffer = (char*)cJSON_malloc(prebuffer);
p.length = prebuffer;
p.offset = 0;
return print_value(item, 0, fmt, &p);
return p.buffer;
}

/* Render a value to text. */
static char *print_value(cJSON *item, int depth, int fmt, printbuffer *p)
{
char *out = 0;
if (!item) return 0;
if (p)
{
switch ((item->type) & 255)
{
case cJSON_NULL: {out = ensure(p, 5);    if (out) strcpy(out, "null");    break; }
case cJSON_False: {out = ensure(p, 6);    if (out) strcpy(out, "false");    break; }
case cJSON_True: {out = ensure(p, 5);    if (out) strcpy(out, "true");    break; }
case cJSON_Number:    out = print_number(item, p); break;
case cJSON_String:    out = print_string(item, p); break;
case cJSON_Array:    out = print_array(item, depth, fmt, p); break;
case cJSON_Object:    out = print_object(item, depth, fmt, p); break;
}
}
else
{
switch ((item->type) & 255)
{
case cJSON_NULL:    out = cJSON_strdup("null");    break;
case cJSON_False:    out = cJSON_strdup("false"); break;
case cJSON_True:    out = cJSON_strdup("true"); break;
case cJSON_Number:    out = print_number(item, 0); break;
case cJSON_String:    out = print_string(item, 0); break;
case cJSON_Array:    out = print_array(item, depth, fmt, 0); break;
case cJSON_Object:    out = print_object(item, depth, fmt, 0); break;
}
}
return out;
}


假设我们要使用格式化输出, 也就是美化输出。

cjson 的做法不是边分析 json 边输出, 而是预先将要输的内容全部按字符串存在内存中, 最后输出整个字符串。

这对于比较大的 json 来说, 内存就是个问题了。

另外,格式化输出依靠的是节点的深度, 这个也可以优化, 一般宽度超过80 时, 就需要从新的一行算起的。

/* Render an object to text. */
static char *print_object(cJSON *item, int depth, int fmt, printbuffer *p)
{
char **entries = 0, **names = 0;
char *out = 0, *ptr, *ret, *str; int len = 7, i = 0, j;
cJSON *child = item->child;
int numentries = 0, fail = 0;
size_t tmplen = 0;
/* Count the number of entries. */
while (child) numentries++, child = child->next;
/* Explicitly handle empty object case */
if (!numentries)
{
if (p) out = ensure(p, fmt ? depth + 4 : 3);
else    out = (char*)cJSON_malloc(fmt ? depth + 4 : 3);
if (!out)    return 0;
ptr = out; *ptr++ = '{';
if (fmt) { *ptr++ = '\n'; for (i = 0; i<depth - 1; i++) *ptr++ = '\t'; }
*ptr++ = '}'; *ptr++ = 0;
return out;
}
if (p)
{
/* Compose the output: */
i = p->offset;
len = fmt ? 2 : 1;    ptr = ensure(p, len + 1);    if (!ptr) return 0;
*ptr++ = '{';    if (fmt) *ptr++ = '\n';    *ptr = 0;    p->offset += len;
child = item->child; depth++;
while (child)
{
if (fmt)
{
ptr = ensure(p, depth);    if (!ptr) return 0;
for (j = 0; j<depth; j++) *ptr++ = '\t';
p->offset += depth;
}
print_string_ptr(child->string, p);
p->offset = update(p);

len = fmt ? 2 : 1;
ptr = ensure(p, len);    if (!ptr) return 0;
*ptr++ = ':'; if (fmt) *ptr++ = '\t';
p->offset += len;

print_value(child, depth, fmt, p);
p->offset = update(p);

len = (fmt ? 1 : 0) + (child->next ? 1 : 0);
ptr = ensure(p, len + 1); if (!ptr) return 0;
if (child->next) *ptr++ = ',';
if (fmt) *ptr++ = '\n'; *ptr = 0;
p->offset += len;
child = child->next;
}
ptr = ensure(p, fmt ? (depth + 1) : 2);     if (!ptr) return 0;
if (fmt)    for (i = 0; i<depth - 1; i++) *ptr++ = '\t';
*ptr++ = '}'; *ptr = 0;
out = (p->buffer) + i;
}
else
{
/* Allocate space for the names and the objects */
entries = (char**)cJSON_malloc(numentries * sizeof(char*));
if (!entries) return 0;
names = (char**)cJSON_malloc(numentries * sizeof(char*));
if (!names) { cJSON_free(entries); return 0; }
memset(entries, 0, sizeof(char*)*numentries);
memset(names, 0, sizeof(char*)*numentries);

/* Collect all the results into our arrays: */
child = item->child; depth++; if (fmt) len += depth;
while (child)
{
names[i] = str = print_string_ptr(child->string, 0);
entries[i++] = ret = print_value(child, depth, fmt, 0);
if (str && ret) len += strlen(ret) + strlen(str) + 2 + (fmt ? 2 + depth : 0); else fail = 1;
child = child->next;
}

/* Try to allocate the output string */
if (!fail)    out = (char*)cJSON_malloc(len);
if (!out) fail = 1;

/* Handle failure */
if (fail)
{
for (i = 0; i<numentries; i++) { if (names[i]) cJSON_free(names[i]); if (entries[i]) cJSON_free(entries[i]); }
cJSON_free(names); cJSON_free(entries);
return 0;
}

/* Compose the output: */
*out = '{'; ptr = out + 1; if (fmt)*ptr++ = '\n'; *ptr = 0;
for (i = 0; i<numentries; i++)
{
if (fmt) for (j = 0; j<depth; j++) *ptr++ = '\t';
tmplen = strlen(names[i]); memcpy(ptr, names[i], tmplen); ptr += tmplen;
*ptr++ = ':'; if (fmt) *ptr++ = '\t';
strcpy(ptr, entries[i]); ptr += strlen(entries[i]);
if (i != numentries - 1) *ptr++ = ',';
if (fmt) *ptr++ = '\n'; *ptr = 0;
cJSON_free(names[i]); cJSON_free(entries[i]);
}

cJSON_free(names); cJSON_free(entries);
if (fmt) for (i = 0; i<depth - 1; i++) *ptr++ = '\t';
*ptr++ = '}'; *ptr++ = 0;
}
return out;
}


static char *print_array(cJSON *item, int depth, int fmt, printbuffer *p)
{
char **entries;
char *out = 0, *ptr, *ret; int len = 5;
cJSON *child = item->child;
int numentries = 0, i = 0, fail = 0;
size_t tmplen = 0;

/* How many entries in the array? */
while (child) numentries++, child = child->next;
/* Explicitly handle numentries==0 */
if (!numentries)
{
if (p)    out = ensure(p, 3);
else    out = (char*)cJSON_malloc(3);
if (out) strcpy(out, "[]");
return out;
}

if (p)
{
/* Compose the output array. */
i = p->offset;
ptr = ensure(p, 1); if (!ptr) return 0;    *ptr = '[';    p->offset++;
child = item->child;
while (child && !fail)
{
print_value(child, depth + 1, fmt, p);
p->offset = update(p);
if (child->next) { len = fmt ? 2 : 1; ptr = ensure(p, len + 1); if (!ptr) return 0; *ptr++ = ','; if (fmt)*ptr++ = ' '; *ptr = 0; p->offset += len; }
child = child->next;
}
ptr = ensure(p, 2); if (!ptr) return 0;    *ptr++ = ']'; *ptr = 0;
out = (p->buffer) + i;
}
else
{
/* Allocate an array to hold the values for each */
entries = (char**)cJSON_malloc(numentries * sizeof(char*));
if (!entries) return 0;
memset(entries, 0, numentries * sizeof(char*));
/* Retrieve all the results: */
child = item->child;
while (child && !fail)
{
ret = print_value(child, depth + 1, fmt, 0);
entries[i++] = ret;
if (ret) len += strlen(ret) + 2 + (fmt ? 1 : 0); else fail = 1;
child = child->next;
}

/* If we didn't fail, try to malloc the output string */
if (!fail)    out = (char*)cJSON_malloc(len);
/* If that fails, we fail. */
if (!out) fail = 1;

/* Handle failure. */
if (fail)
{
for (i = 0; i<numentries; i++) if (entries[i]) cJSON_free(entries[i]);
cJSON_free(entries);
return 0;
}

/* Compose the output array. */
*out = '[';
ptr = out + 1; *ptr = 0;
for (i = 0; i<numentries; i++)
{
tmplen = strlen(entries[i]); memcpy(ptr, entries[i], tmplen); ptr += tmplen;
if (i != numentries - 1) { *ptr++ = ','; if (fmt)*ptr++ = ' '; *ptr = 0; }
cJSON_free(entries[i]);
}
cJSON_free(entries);
*ptr++ = ']'; *ptr++ = 0;
}
return out;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: