Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit099bb1a

Browse files
committed
Pass buffer to primitive parse functions
1 parent9c33093 commit099bb1a

File tree

2 files changed

+105
-89
lines changed

2 files changed

+105
-89
lines changed

‎src/generic/stage2/structural_iterator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ class structural_iterator {
2626
really_inlinecharpeek_next_char() {
2727
return buf[*(current_structural+1)];
2828
}
29+
really_inlineconstuint8_t*advance() {
30+
current_structural++;
31+
return &buf[*current_structural];
32+
}
2933
really_inlinecharadvance_char() {
3034
current_structural++;
3135
return buf[*current_structural];

‎src/generic/stage2/structural_parser.h

Lines changed: 101 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,13 @@ struct structural_parser : structural_iterator {
138138
current_string_buf_loc = dst +1;
139139
}
140140

141-
WARN_UNUSED really_inline error_codeparse_string(bool key =false) {
141+
WARN_UNUSED really_inline error_codeparse_key(constuint8_t *key) {
142+
returnparse_string(key,true);
143+
}
144+
WARN_UNUSED really_inline error_codeparse_string(constuint8_t *value,bool key =false) {
142145
log_value(key ?"key" :"string");
143146
uint8_t *dst =on_start_string();
144-
dst =stringparsing::parse_string(current(), dst);
147+
dst =stringparsing::parse_string(value, dst);
145148
if (dst ==nullptr) {
146149
log_error("Invalid escape in string");
147150
return STRING_ERROR;
@@ -150,79 +153,75 @@ struct structural_parser : structural_iterator {
150153
return SUCCESS;
151154
}
152155

153-
WARN_UNUSED really_inline error_codeparse_number(constuint8_t *src) {
156+
WARN_UNUSED really_inline error_codeparse_number(constuint8_t *value) {
154157
log_value("number");
155-
if (!numberparsing::parse_number(src, tape)) {log_error("Invalid number");return NUMBER_ERROR; }
158+
if (!numberparsing::parse_number(value, tape)) {log_error("Invalid number");return NUMBER_ERROR; }
156159
return SUCCESS;
157160
}
158-
WARN_UNUSED really_inline error_codeparse_number() {
159-
returnparse_number(current());
160-
}
161-
162-
really_inline error_codeparse_root_number() {
163-
/**
164-
* We need to make a copy to make sure that the string is space terminated.
165-
* This is not about padding the input, which should already padded up
166-
* to len + SIMDJSON_PADDING. However, we have no control at this stage
167-
* on how the padding was done. What if the input string was padded with nulls?
168-
* It is quite common for an input string to have an extra null character (C string).
169-
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
170-
* document, but the string "9\0" by itself is fine. So we make a copy and
171-
* pad the input with spaces when we know that there is just one input element.
172-
* This copy is relatively expensive, but it will almost never be called in
173-
* practice unless you are in the strange scenario where you have many JSON
174-
* documents made of single atoms.
175-
*/
176-
uint8_t *copy =static_cast<uint8_t *>(malloc(parser.len + SIMDJSON_PADDING));
161+
162+
really_inline error_codeparse_root_number(constuint8_t *value) {
163+
//
164+
// We need to make a copy to make sure that the string is space terminated.
165+
// This is not about padding the input, which should already padded up
166+
// to len + SIMDJSON_PADDING. However, we have no control at this stage
167+
// on how the padding was done. What if the input string was padded with nulls?
168+
// It is quite common for an input string to have an extra null character (C string).
169+
// We do not want to allow 9\0 (where \0 is the null character) inside a JSON
170+
// document, but the string "9\0" by itself is fine. So we make a copy and
171+
// pad the input with spaces when we know that there is just one input element.
172+
// This copy is relatively expensive, but it will almost never be called in
173+
// practice unless you are in the strange scenario where you have many JSON
174+
// documents made of single atoms.
175+
//
176+
uint8_t *copy =static_cast<uint8_t *>(malloc(remaining_len() + SIMDJSON_PADDING));
177177
if (copy ==nullptr) {
178178
return MEMALLOC;
179179
}
180-
memcpy(copy, buf, parser.len);
181-
memset(copy + parser.len,'', SIMDJSON_PADDING);
182-
size_t idx = *current_structural;
183-
error_code error =parse_number(&copy[idx]);// parse_number does not throw
180+
memcpy(copy, value,remaining_len());
181+
memset(copy +remaining_len(),'', SIMDJSON_PADDING);
182+
error_code error =parse_number(copy);
184183
free(copy);
185184
return error;
186185
}
187186

188-
WARN_UNUSED really_inline error_codeparse_true_atom() {
187+
WARN_UNUSED really_inline error_codeparse_true_atom(constuint8_t *value) {
189188
log_value("true");
190-
if (!atomparsing::is_valid_true_atom(current())) {return T_ATOM_ERROR; }
189+
if (!atomparsing::is_valid_true_atom(value)) {return T_ATOM_ERROR; }
191190
tape.append(0, internal::tape_type::TRUE_VALUE);
192191
return SUCCESS;
193192
}
194193

195-
WARN_UNUSED really_inline error_codeparse_root_true_atom() {
194+
WARN_UNUSED really_inline error_codeparse_root_true_atom(constuint8_t *value) {
196195
log_value("true");
197-
if (!atomparsing::is_valid_true_atom(current(),remaining_len())) {return T_ATOM_ERROR; }
196+
if (!atomparsing::is_valid_true_atom(value,remaining_len())) {return T_ATOM_ERROR; }
198197
tape.append(0, internal::tape_type::TRUE_VALUE);
199198
return SUCCESS;
200199
}
201200

202-
WARN_UNUSED really_inline error_codeparse_false_atom() {
201+
WARN_UNUSED really_inline error_codeparse_false_atom(constuint8_t *value) {
203202
log_value("false");
204-
if (!atomparsing::is_valid_false_atom(current())) {return F_ATOM_ERROR; }
203+
if (!atomparsing::is_valid_false_atom(value)) {return F_ATOM_ERROR; }
205204
tape.append(0, internal::tape_type::FALSE_VALUE);
206205
return SUCCESS;
207206
}
208207

209-
WARN_UNUSED really_inline error_codeparse_root_false_atom() {
208+
WARN_UNUSED really_inline error_codeparse_root_false_atom(constuint8_t *value) {
210209
log_value("false");
211-
if (!atomparsing::is_valid_false_atom(current(),remaining_len())) {return F_ATOM_ERROR; }
210+
if (!atomparsing::is_valid_false_atom(value,remaining_len())) {return F_ATOM_ERROR; }
212211
tape.append(0, internal::tape_type::FALSE_VALUE);
213212
return SUCCESS;
214213
}
215214

216-
WARN_UNUSED really_inline error_codeparse_null_atom() {
215+
WARN_UNUSED really_inline error_codeparse_null_atom(constuint8_t *value) {
217216
log_value("null");
218-
if (!atomparsing::is_valid_null_atom(current())) {return N_ATOM_ERROR; }
217+
if (!atomparsing::is_valid_null_atom(value)) {return N_ATOM_ERROR; }
219218
tape.append(0, internal::tape_type::NULL_VALUE);
220219
return SUCCESS;
221220
}
222221

223-
WARN_UNUSED really_inline error_codeparse_root_null_atom() {
222+
WARN_UNUSED really_inline error_codeparse_root_null_atom(constuint8_t *value) {
224223
log_value("null");
225-
if (!atomparsing::is_valid_null_atom(current(),remaining_len())) {return N_ATOM_ERROR; }
224+
if (!atomparsing::is_valid_null_atom(value,remaining_len())) {return N_ATOM_ERROR; }
226225
tape.append(0, internal::tape_type::NULL_VALUE);
227226
return SUCCESS;
228227
}
@@ -279,50 +278,54 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
279278
//
280279
// Read first value
281280
//
282-
switch (parser.current_char()) {
283-
case'{': {
284-
if (parser.empty_object()) {goto document_end; }
285-
SIMDJSON_TRY( parser.start_object() );
286-
goto object_begin;
287-
}
288-
case'[': {
289-
if (parser.empty_array()) {goto document_end; }
290-
SIMDJSON_TRY( parser.start_array() );
291-
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
292-
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
293-
if (!STREAMING) {
294-
if (parser.buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes -1]] !=']') {
295-
return TAPE_ERROR;
281+
{
282+
switch (parser.current_char()) {
283+
case'{': {
284+
if (parser.empty_object()) {goto document_end; }
285+
SIMDJSON_TRY( parser.start_object() );
286+
goto object_begin;
287+
}
288+
case'[': {
289+
if (parser.empty_array()) {goto document_end; }
290+
SIMDJSON_TRY( parser.start_array() );
291+
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
292+
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
293+
if (!STREAMING) {
294+
if (parser.buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes -1]] !=']') {
295+
return TAPE_ERROR;
296+
}
296297
}
298+
goto array_begin;
299+
}
300+
case'"':SIMDJSON_TRY( parser.parse_string(parser.current()) );goto document_end;
301+
case't':SIMDJSON_TRY( parser.parse_root_true_atom(parser.current()) );goto document_end;
302+
case'f':SIMDJSON_TRY( parser.parse_root_false_atom(parser.current()) );goto document_end;
303+
case'n':SIMDJSON_TRY( parser.parse_root_null_atom(parser.current()) );goto document_end;
304+
case'-':
305+
case'0':case'1':case'2':case'3':case'4':
306+
case'5':case'6':case'7':case'8':case'9':
307+
SIMDJSON_TRY( parser.parse_root_number(parser.current()) );goto document_end;
308+
default:
309+
parser.log_error("Document starts with a non-value character");
310+
return TAPE_ERROR;
297311
}
298-
goto array_begin;
299-
}
300-
case'"':SIMDJSON_TRY( parser.parse_string() );goto document_end;
301-
case't':SIMDJSON_TRY( parser.parse_root_true_atom() );goto document_end;
302-
case'f':SIMDJSON_TRY( parser.parse_root_false_atom() );goto document_end;
303-
case'n':SIMDJSON_TRY( parser.parse_root_null_atom() );goto document_end;
304-
case'-':
305-
case'0':case'1':case'2':case'3':case'4':
306-
case'5':case'6':case'7':case'8':case'9':
307-
SIMDJSON_TRY( parser.parse_root_number() );goto document_end;
308-
default:
309-
parser.log_error("Document starts with a non-value character");
310-
return TAPE_ERROR;
311312
}
312313

313314
//
314315
// Object parser states
315316
//
316-
object_begin:
317-
if (parser.advance_char() !='"') {
317+
object_begin: {
318+
constuint8_t *key = parser.advance();
319+
if (*key !='"') {
318320
parser.log_error("Object does not start with a key");
319321
return TAPE_ERROR;
320322
}
321323
parser.increment_count();
322-
SIMDJSON_TRY( parser.parse_string(true) );
324+
SIMDJSON_TRY( parser.parse_key(key) );
323325
goto object_field;
326+
}// object_begin:
324327

325-
object_field:
328+
object_field: {
326329
if (unlikely( parser.advance_char() !=':' )) { parser.log_error("Missing colon after key in object");return TAPE_ERROR; }
327330
switch (parser.advance_char()) {
328331
case'{': {
@@ -335,46 +338,52 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
335338
SIMDJSON_TRY( parser.start_array() );
336339
goto array_begin;
337340
}
338-
case'"':SIMDJSON_TRY( parser.parse_string() );break;
339-
case't':SIMDJSON_TRY( parser.parse_true_atom() );break;
340-
case'f':SIMDJSON_TRY( parser.parse_false_atom() );break;
341-
case'n':SIMDJSON_TRY( parser.parse_null_atom() );break;
341+
case'"':SIMDJSON_TRY( parser.parse_string(parser.current()) );break;
342+
case't':SIMDJSON_TRY( parser.parse_true_atom(parser.current()) );break;
343+
case'f':SIMDJSON_TRY( parser.parse_false_atom(parser.current()) );break;
344+
case'n':SIMDJSON_TRY( parser.parse_null_atom(parser.current()) );break;
342345
case'-':
343346
case'0':case'1':case'2':case'3':case'4':
344347
case'5':case'6':case'7':case'8':case'9':
345-
SIMDJSON_TRY( parser.parse_number() );break;
348+
SIMDJSON_TRY( parser.parse_number(parser.current()) );break;
346349
default:
347350
parser.log_error("Non-value found when value was expected!");
348351
return TAPE_ERROR;
349352
}
353+
}// object_field:
350354

351-
object_continue:
355+
object_continue: {
352356
switch (parser.advance_char()) {
353-
case',':
357+
case',': {
354358
parser.increment_count();
355-
if (unlikely( parser.advance_char() !='"' )) { parser.log_error("Key string missing at beginning of field in object");return TAPE_ERROR; }
356-
SIMDJSON_TRY( parser.parse_string(true) );
359+
constuint8_t *key = parser.advance();
360+
if (unlikely( *key !='"' )) { parser.log_error("Key string missing at beginning of field in object");return TAPE_ERROR; }
361+
SIMDJSON_TRY( parser.parse_key(key) );
357362
goto object_field;
363+
}
358364
case'}':
359365
parser.end_object();
360366
goto scope_end;
361367
default:
362368
parser.log_error("No comma between object fields");
363369
return TAPE_ERROR;
364370
}
371+
}// object_continue:
365372

366-
scope_end:
373+
scope_end: {
367374
if (parser.depth ==0) {goto document_end; }
368375
if (parser.parser.is_array[parser.depth]) {goto array_continue; }
369376
goto object_continue;
377+
}// scope_end:
370378

371379
//
372380
// Array parser states
373381
//
374-
array_begin:
382+
array_begin: {
375383
parser.increment_count();
384+
}// array_begin:
376385

377-
array_value:
386+
array_value: {
378387
switch (parser.advance_char()) {
379388
case'{': {
380389
if (parser.empty_object()) {break; };
@@ -386,20 +395,21 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
386395
SIMDJSON_TRY( parser.start_array() );
387396
goto array_begin;
388397
}
389-
case'"':SIMDJSON_TRY( parser.parse_string() );break;
390-
case't':SIMDJSON_TRY( parser.parse_true_atom() );break;
391-
case'f':SIMDJSON_TRY( parser.parse_false_atom() );break;
392-
case'n':SIMDJSON_TRY( parser.parse_null_atom() );break;
398+
case'"':SIMDJSON_TRY( parser.parse_string(parser.current()) );break;
399+
case't':SIMDJSON_TRY( parser.parse_true_atom(parser.current()) );break;
400+
case'f':SIMDJSON_TRY( parser.parse_false_atom(parser.current()) );break;
401+
case'n':SIMDJSON_TRY( parser.parse_null_atom(parser.current()) );break;
393402
case'-':
394403
case'0':case'1':case'2':case'3':case'4':
395404
case'5':case'6':case'7':case'8':case'9':
396-
SIMDJSON_TRY( parser.parse_number() );break;
405+
SIMDJSON_TRY( parser.parse_number(parser.current()) );break;
397406
default:
398407
parser.log_error("Non-value found when value was expected!");
399408
return TAPE_ERROR;
400409
}
410+
}// array_value:
401411

402-
array_continue:
412+
array_continue: {
403413
switch (parser.advance_char()) {
404414
case',':
405415
parser.increment_count();
@@ -411,9 +421,11 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
411421
parser.log_error("Missing comma between array values");
412422
return TAPE_ERROR;
413423
}
424+
}// array_continue:
414425

415-
document_end:
426+
document_end: {
416427
return parser.finish();
428+
}// document_end:
417429

418430
}// parse_structurals()
419431

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp