Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit03d54f8

Browse files
committed
Use SAX model for stage 2
1 parent553e6d7 commit03d54f8

File tree

3 files changed

+246
-160
lines changed

3 files changed

+246
-160
lines changed

‎src/generic/stage2/logger.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ namespace logger {
2828
if (LOG_ENABLED) {
2929
log_depth =0;
3030
printf("\n");
31-
printf("| %-*s | %-*s | %-*s | %-*s |%-*s |Detail |\n", LOG_EVENT_LEN,"Event", LOG_BUFFER_LEN,"Buffer", LOG_SMALL_BUFFER_LEN,"Next",5,"Next#",5,"Tape#");
32-
printf("|%.*s|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES,5+2, DASHES,5+2, DASHES);
31+
printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN,"Event", LOG_BUFFER_LEN,"Buffer", LOG_SMALL_BUFFER_LEN,"Next",5,"Next#");
32+
printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES,5+2, DASHES);
3333
}
3434
}
3535

@@ -71,7 +71,7 @@ namespace logger {
7171
}else {
7272
printf("| %-*s", LOG_INDEX_LEN,"");
7373
}
74-
printf("| %*u", LOG_INDEX_LEN, structurals.next_tape_index());
74+
//printf("| %*u ", LOG_INDEX_LEN, structurals.next_tape_index());
7575
printf("| %-s", detail);
7676
printf("|\n");
7777
}

‎src/generic/stage2/structural_parser.h

Lines changed: 48 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -3,226 +3,109 @@
33
// We assume the file in which it is include already includes
44
// "simdjson/stage2.h" (this simplifies amalgation)
55

6-
#include"generic/stage2/tape_writer.h"
76
#include"generic/stage2/logger.h"
8-
#include"generic/stage2/atomparsing.h"
97
#include"generic/stage2/structural_iterator.h"
108

119
namespace {// Make everything here private
1210
namespaceSIMDJSON_IMPLEMENTATION {
1311
namespacestage2 {
1412

13+
#defineSIMDJSON_TRY(EXPR) {auto _err = (EXPR);if (_err) {return _err; } }
14+
15+
template<typename T>
1516
structstructural_parser : structural_iterator {
16-
/** Lets you append to the tape*/
17-
tape_writer tape;
18-
/** Next write location in the string buf for stage 2 parsing*/
19-
uint8_t *current_string_buf_loc;
17+
/** Receiver that actually parses the strings and builds the tape*/
18+
T builder;
2019
/** Current depth (nested objects and arrays)*/
2120
uint32_t depth{0};
2221

2322
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
2423
really_inlinestructural_parser(dom_parser_implementation &_parser,uint32_t start_structural_index)
2524
: structural_iterator(_parser, start_structural_index),
26-
tape{parser.doc->tape.get()},
27-
current_string_buf_loc{parser.doc->string_buf.get()} {
28-
}
29-
30-
WARN_UNUSED really_inline error_codestart_scope(bool is_array) {
31-
depth++;
32-
if (depth >= parser.max_depth()) {log_error("Exceeded max depth!");return DEPTH_ERROR; }
33-
parser.containing_scope[depth].tape_index =next_tape_index();
34-
parser.containing_scope[depth].count =0;
35-
tape.skip();// We don't actually *write* the start element until the end.
36-
parser.is_array[depth] = is_array;
37-
return SUCCESS;
25+
builder{parser.doc->tape.get(), parser.doc->string_buf.get()} {
3826
}
3927

4028
WARN_UNUSED really_inline error_codestart_document() {
41-
log_start_value("document");
42-
parser.containing_scope[depth].tape_index =next_tape_index();
43-
parser.containing_scope[depth].count =0;
44-
tape.skip();// We don't actually *write* the start element until the end.
29+
builder.start_document(*this);
4530
parser.is_array[depth] =false;
4631
return SUCCESS;
4732
}
48-
4933
WARN_UNUSED really_inline error_codestart_object() {
50-
log_start_value("object");
51-
returnstart_scope(false);
34+
depth++;
35+
if (depth >= parser.max_depth()) {log_error("Exceeded max depth!");return DEPTH_ERROR; }
36+
builder.start_object(*this);
37+
parser.is_array[depth] =false;
38+
return SUCCESS;
5239
}
53-
5440
WARN_UNUSED really_inline error_codestart_array() {
55-
log_start_value("array");
56-
returnstart_scope(true);
57-
}
58-
59-
// this function is responsible for annotating the start of the scope
60-
really_inlinevoidend_scope(internal::tape_type start, internal::tape_type end)noexcept {
61-
// SIMDJSON_ASSUME(depth > 0);
62-
// Write the ending tape element, pointing at the start location
63-
constuint32_t start_tape_index = parser.containing_scope[depth].tape_index;
64-
tape.append(start_tape_index, end);
65-
// Write the start tape element, pointing at the end location (and including count)
66-
// count can overflow if it exceeds 24 bits... so we saturate
67-
// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
68-
constuint32_t count = parser.containing_scope[depth].count;
69-
constuint32_t cntsat = count >0xFFFFFF ?0xFFFFFF : count;
70-
tape_writer::write(parser.doc->tape[start_tape_index],next_tape_index() | (uint64_t(cntsat) <<32), start);
71-
depth--;
72-
}
73-
74-
really_inlineuint32_tnext_tape_index() {
75-
returnuint32_t(tape.next_tape_loc - parser.doc->tape.get());
41+
depth++;
42+
if (depth >= parser.max_depth()) {log_error("Exceeded max depth!");return DEPTH_ERROR; }
43+
builder.start_array(*this);
44+
parser.is_array[depth] =true;
45+
return SUCCESS;
7646
}
77-
7847
really_inlinevoidend_object() {
79-
log_end_value("object");
80-
end_scope(internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
48+
builder.end_object(*this);
49+
depth--;
8150
}
8251
really_inlinevoidend_array() {
83-
log_end_value("array");
84-
end_scope(internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
52+
builder.end_array(*this);
53+
depth--;
8554
}
8655
really_inlinevoidend_document() {
87-
log_end_value("document");
88-
constexpruint32_t start_tape_index =0;
89-
tape.append(start_tape_index, internal::tape_type::ROOT);
90-
tape_writer::write(parser.doc->tape[start_tape_index],next_tape_index(), internal::tape_type::ROOT);
56+
builder.end_document(*this);
9157
}
9258

93-
really_inlinevoidempty_container(internal::tape_type start, internal::tape_type end) {
94-
auto start_index =next_tape_index();
95-
tape.append(start_index+2, start);
96-
tape.append(start_index, end);
97-
}
9859
WARN_UNUSED really_inlineboolempty_object() {
9960
if (peek_next_char() =='}') {
10061
advance_char();
101-
log_value("empty object");
102-
empty_container(internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
62+
builder.empty_object(*this);
10363
returntrue;
10464
}
10565
returnfalse;
10666
}
10767
WARN_UNUSED really_inlineboolempty_array() {
10868
if (peek_next_char() ==']') {
10969
advance_char();
110-
log_value("empty array");
111-
empty_container(internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
70+
builder.empty_array(*this);
11271
returntrue;
11372
}
11473
returnfalse;
11574
}
11675

117-
// increment_count increments the count of keys in an object or values in an array.
11876
really_inlinevoidincrement_count() {
119-
parser.containing_scope[depth].count++;// we have a key value pair in the object at parser.depth - 1
120-
}
121-
122-
really_inlineuint8_t *on_start_string()noexcept {
123-
// we advance the point, accounting for the fact that we have a NULL termination
124-
tape.append(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING);
125-
return current_string_buf_loc +sizeof(uint32_t);
126-
}
127-
128-
really_inlinevoidon_end_string(uint8_t *dst)noexcept {
129-
uint32_t str_length =uint32_t(dst - (current_string_buf_loc +sizeof(uint32_t)));
130-
// TODO check for overflow in case someone has a crazy string (>=4GB?)
131-
// But only add the overflow check when the document itself exceeds 4GB
132-
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
133-
memcpy(current_string_buf_loc, &str_length,sizeof(uint32_t));
134-
// NULL termination is still handy if you expect all your strings to
135-
// be NULL terminated? It comes at a small cost
136-
*dst =0;
137-
current_string_buf_loc = dst +1;
77+
builder.increment_count(*this);
13878
}
13979

14080
WARN_UNUSED really_inline error_codeparse_key(constuint8_t *key) {
141-
returnparse_string(key,true);
142-
}
143-
WARN_UNUSED really_inline error_codeparse_string(constuint8_t *value,bool key =false) {
144-
log_value(key ?"key" :"string");
145-
uint8_t *dst =on_start_string();
146-
dst =stringparsing::parse_string(value, dst);
147-
if (dst ==nullptr) {
148-
log_error("Invalid escape in string");
149-
return STRING_ERROR;
150-
}
151-
on_end_string(dst);
152-
return SUCCESS;
81+
return builder.parse_key(*this, key);
82+
}
83+
WARN_UNUSED really_inline error_codeparse_string(constuint8_t *value) {
84+
return builder.parse_string(*this, value);
15385
}
154-
15586
WARN_UNUSED really_inline error_codeparse_number(constuint8_t *value) {
156-
log_value("number");
157-
if (!numberparsing::parse_number(value, tape)) {log_error("Invalid number");return NUMBER_ERROR; }
158-
return SUCCESS;
87+
return builder.parse_number(*this, value);
15988
}
160-
161-
really_inline error_codeparse_root_number(constuint8_t *value) {
162-
//
163-
// We need to make a copy to make sure that the string is space terminated.
164-
// This is not about padding the input, which should already padded up
165-
// to len + SIMDJSON_PADDING. However, we have no control at this stage
166-
// on how the padding was done. What if the input string was padded with nulls?
167-
// It is quite common for an input string to have an extra null character (C string).
168-
// We do not want to allow 9\0 (where \0 is the null character) inside a JSON
169-
// document, but the string "9\0" by itself is fine. So we make a copy and
170-
// pad the input with spaces when we know that there is just one input element.
171-
// This copy is relatively expensive, but it will almost never be called in
172-
// practice unless you are in the strange scenario where you have many JSON
173-
// documents made of single atoms.
174-
//
175-
uint8_t *copy =static_cast<uint8_t *>(malloc(remaining_len() + SIMDJSON_PADDING));
176-
if (copy ==nullptr) {
177-
return MEMALLOC;
178-
}
179-
memcpy(copy, value,remaining_len());
180-
memset(copy +remaining_len(),'', SIMDJSON_PADDING);
181-
error_code error =parse_number(copy);
182-
free(copy);
183-
return error;
89+
WARN_UNUSED really_inline error_codeparse_root_number(constuint8_t *value) {
90+
return builder.parse_root_number(*this, value);
18491
}
185-
18692
WARN_UNUSED really_inline error_codeparse_true_atom(constuint8_t *value) {
187-
log_value("true");
188-
if (!atomparsing::is_valid_true_atom(value)) {return T_ATOM_ERROR; }
189-
tape.append(0, internal::tape_type::TRUE_VALUE);
190-
return SUCCESS;
93+
return builder.parse_true_atom(*this, value);
19194
}
192-
19395
WARN_UNUSED really_inline error_codeparse_root_true_atom(constuint8_t *value) {
194-
log_value("true");
195-
if (!atomparsing::is_valid_true_atom(value,remaining_len())) {return T_ATOM_ERROR; }
196-
tape.append(0, internal::tape_type::TRUE_VALUE);
197-
return SUCCESS;
96+
return builder.parse_root_true_atom(*this, value);
19897
}
199-
20098
WARN_UNUSED really_inline error_codeparse_false_atom(constuint8_t *value) {
201-
log_value("false");
202-
if (!atomparsing::is_valid_false_atom(value)) {return F_ATOM_ERROR; }
203-
tape.append(0, internal::tape_type::FALSE_VALUE);
204-
return SUCCESS;
99+
return builder.parse_false_atom(*this, value);
205100
}
206-
207101
WARN_UNUSED really_inline error_codeparse_root_false_atom(constuint8_t *value) {
208-
log_value("false");
209-
if (!atomparsing::is_valid_false_atom(value,remaining_len())) {return F_ATOM_ERROR; }
210-
tape.append(0, internal::tape_type::FALSE_VALUE);
211-
return SUCCESS;
102+
return builder.parse_root_false_atom(*this, value);
212103
}
213-
214104
WARN_UNUSED really_inline error_codeparse_null_atom(constuint8_t *value) {
215-
log_value("null");
216-
if (!atomparsing::is_valid_null_atom(value)) {return N_ATOM_ERROR; }
217-
tape.append(0, internal::tape_type::NULL_VALUE);
218-
return SUCCESS;
105+
return builder.parse_null_atom(*this, value);
219106
}
220-
221107
WARN_UNUSED really_inline error_codeparse_root_null_atom(constuint8_t *value) {
222-
log_value("null");
223-
if (!atomparsing::is_valid_null_atom(value,remaining_len())) {return N_ATOM_ERROR; }
224-
tape.append(0, internal::tape_type::NULL_VALUE);
225-
return SUCCESS;
108+
return builder.parse_root_null_atom(*this, value);
226109
}
227110

228111
WARN_UNUSED really_inline error_codestart() {
@@ -266,12 +149,20 @@ struct structural_parser : structural_iterator {
266149
}
267150
};// struct structural_parser
268151

269-
#defineSIMDJSON_TRY(EXPR) {auto _err = (EXPR);if (_err) {return _err; } }
152+
}// namespace stage2
153+
}// namespace SIMDJSON_IMPLEMENTATION
154+
}// unnamed namespace
155+
156+
#include"generic/stage2/tape_builder.h"
157+
158+
namespace {// Make everything here private
159+
namespaceSIMDJSON_IMPLEMENTATION {
160+
namespacestage2 {
270161

271162
template<bool STREAMING>
272163
WARN_UNUSEDstatic really_inline error_codeparse_structurals(dom_parser_implementation &dom_parser, dom::document &doc)noexcept {
273164
dom_parser.doc = &doc;
274-
stage2::structural_parserparser(dom_parser, STREAMING ? dom_parser.next_structural_index :0);
165+
stage2::structural_parser<stage2::tape_builder>parser(dom_parser, STREAMING ? dom_parser.next_structural_index :0);
275166
SIMDJSON_TRY( parser.start() );
276167

277168
//

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp