|
3 | 3 | // We assume the file in which it is include already includes
|
4 | 4 | // "simdjson/stage2.h" (this simplifies amalgation)
|
5 | 5 |
|
6 |
| -#include"generic/stage2/tape_writer.h" |
7 | 6 | #include"generic/stage2/logger.h"
|
8 |
| -#include"generic/stage2/atomparsing.h" |
9 | 7 | #include"generic/stage2/structural_iterator.h"
|
10 | 8 |
|
11 | 9 | namespace {// Make everything here private
|
12 | 10 | namespaceSIMDJSON_IMPLEMENTATION {
|
13 | 11 | namespacestage2 {
|
14 | 12 |
|
| 13 | +#defineSIMDJSON_TRY(EXPR) {auto _err = (EXPR);if (_err) {return _err; } } |
| 14 | + |
| 15 | +template<typename T> |
15 | 16 | structstructural_parser : structural_iterator {
|
16 |
| -/** Lets you append to the tape*/ |
17 |
| - tape_writer tape; |
18 |
| -/** Next write location in the string buf for stage 2 parsing*/ |
19 |
| -uint8_t *current_string_buf_loc; |
| 17 | +/** Receiver that actually parses the strings and builds the tape*/ |
| 18 | + T builder; |
20 | 19 | /** Current depth (nested objects and arrays)*/
|
21 | 20 | uint32_t depth{0};
|
22 | 21 |
|
23 | 22 | // For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
|
24 | 23 | really_inlinestructural_parser(dom_parser_implementation &_parser,uint32_t start_structural_index)
|
25 | 24 | : structural_iterator(_parser, start_structural_index),
|
26 |
| - tape{parser.doc->tape.get()}, |
27 |
| - current_string_buf_loc{parser.doc->string_buf.get()} { |
28 |
| - } |
29 |
| - |
30 |
| - WARN_UNUSED really_inline error_codestart_scope(bool is_array) { |
31 |
| - depth++; |
32 |
| -if (depth >= parser.max_depth()) {log_error("Exceeded max depth!");return DEPTH_ERROR; } |
33 |
| - parser.containing_scope[depth].tape_index =next_tape_index(); |
34 |
| - parser.containing_scope[depth].count =0; |
35 |
| - tape.skip();// We don't actually *write* the start element until the end. |
36 |
| - parser.is_array[depth] = is_array; |
37 |
| -return SUCCESS; |
| 25 | + builder{parser.doc->tape.get(), parser.doc->string_buf.get()} { |
38 | 26 | }
|
39 | 27 |
|
40 | 28 | WARN_UNUSED really_inline error_codestart_document() {
|
41 |
| -log_start_value("document"); |
42 |
| - parser.containing_scope[depth].tape_index =next_tape_index(); |
43 |
| - parser.containing_scope[depth].count =0; |
44 |
| - tape.skip();// We don't actually *write* the start element until the end. |
| 29 | + builder.start_document(*this); |
45 | 30 | parser.is_array[depth] =false;
|
46 | 31 | return SUCCESS;
|
47 | 32 | }
|
48 |
| - |
49 | 33 | WARN_UNUSED really_inline error_codestart_object() {
|
50 |
| -log_start_value("object"); |
51 |
| -returnstart_scope(false); |
| 34 | + depth++; |
| 35 | +if (depth >= parser.max_depth()) {log_error("Exceeded max depth!");return DEPTH_ERROR; } |
| 36 | + builder.start_object(*this); |
| 37 | + parser.is_array[depth] =false; |
| 38 | +return SUCCESS; |
52 | 39 | }
|
53 |
| - |
54 | 40 | WARN_UNUSED really_inline error_codestart_array() {
|
55 |
| -log_start_value("array"); |
56 |
| -returnstart_scope(true); |
57 |
| - } |
58 |
| - |
59 |
| -// this function is responsible for annotating the start of the scope |
60 |
| - really_inlinevoidend_scope(internal::tape_type start, internal::tape_type end)noexcept { |
61 |
| -// SIMDJSON_ASSUME(depth > 0); |
62 |
| -// Write the ending tape element, pointing at the start location |
63 |
| -constuint32_t start_tape_index = parser.containing_scope[depth].tape_index; |
64 |
| - tape.append(start_tape_index, end); |
65 |
| -// Write the start tape element, pointing at the end location (and including count) |
66 |
| -// count can overflow if it exceeds 24 bits... so we saturate |
67 |
| -// the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). |
68 |
| -constuint32_t count = parser.containing_scope[depth].count; |
69 |
| -constuint32_t cntsat = count >0xFFFFFF ?0xFFFFFF : count; |
70 |
| -tape_writer::write(parser.doc->tape[start_tape_index],next_tape_index() | (uint64_t(cntsat) <<32), start); |
71 |
| - depth--; |
72 |
| - } |
73 |
| - |
74 |
| - really_inlineuint32_tnext_tape_index() { |
75 |
| -returnuint32_t(tape.next_tape_loc - parser.doc->tape.get()); |
| 41 | + depth++; |
| 42 | +if (depth >= parser.max_depth()) {log_error("Exceeded max depth!");return DEPTH_ERROR; } |
| 43 | + builder.start_array(*this); |
| 44 | + parser.is_array[depth] =true; |
| 45 | +return SUCCESS; |
76 | 46 | }
|
77 |
| - |
78 | 47 | really_inlinevoidend_object() {
|
79 |
| -log_end_value("object"); |
80 |
| -end_scope(internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); |
| 48 | +builder.end_object(*this); |
| 49 | +depth--; |
81 | 50 | }
|
82 | 51 | really_inlinevoidend_array() {
|
83 |
| -log_end_value("array"); |
84 |
| -end_scope(internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); |
| 52 | +builder.end_array(*this); |
| 53 | +depth--; |
85 | 54 | }
|
86 | 55 | really_inlinevoidend_document() {
|
87 |
| -log_end_value("document"); |
88 |
| -constexpruint32_t start_tape_index =0; |
89 |
| - tape.append(start_tape_index, internal::tape_type::ROOT); |
90 |
| -tape_writer::write(parser.doc->tape[start_tape_index],next_tape_index(), internal::tape_type::ROOT); |
| 56 | + builder.end_document(*this); |
91 | 57 | }
|
92 | 58 |
|
93 |
| - really_inlinevoidempty_container(internal::tape_type start, internal::tape_type end) { |
94 |
| -auto start_index =next_tape_index(); |
95 |
| - tape.append(start_index+2, start); |
96 |
| - tape.append(start_index, end); |
97 |
| - } |
98 | 59 | WARN_UNUSED really_inlineboolempty_object() {
|
99 | 60 | if (peek_next_char() =='}') {
|
100 | 61 | advance_char();
|
101 |
| -log_value("empty object"); |
102 |
| -empty_container(internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); |
| 62 | + builder.empty_object(*this); |
103 | 63 | returntrue;
|
104 | 64 | }
|
105 | 65 | returnfalse;
|
106 | 66 | }
|
107 | 67 | WARN_UNUSED really_inlineboolempty_array() {
|
108 | 68 | if (peek_next_char() ==']') {
|
109 | 69 | advance_char();
|
110 |
| -log_value("empty array"); |
111 |
| -empty_container(internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); |
| 70 | + builder.empty_array(*this); |
112 | 71 | returntrue;
|
113 | 72 | }
|
114 | 73 | returnfalse;
|
115 | 74 | }
|
116 | 75 |
|
117 |
| -// increment_count increments the count of keys in an object or values in an array. |
118 | 76 | really_inlinevoidincrement_count() {
|
119 |
| - parser.containing_scope[depth].count++;// we have a key value pair in the object at parser.depth - 1 |
120 |
| - } |
121 |
| - |
122 |
| - really_inlineuint8_t *on_start_string()noexcept { |
123 |
| -// we advance the point, accounting for the fact that we have a NULL termination |
124 |
| - tape.append(current_string_buf_loc - parser.doc->string_buf.get(), internal::tape_type::STRING); |
125 |
| -return current_string_buf_loc +sizeof(uint32_t); |
126 |
| - } |
127 |
| - |
128 |
| - really_inlinevoidon_end_string(uint8_t *dst)noexcept { |
129 |
| -uint32_t str_length =uint32_t(dst - (current_string_buf_loc +sizeof(uint32_t))); |
130 |
| -// TODO check for overflow in case someone has a crazy string (>=4GB?) |
131 |
| -// But only add the overflow check when the document itself exceeds 4GB |
132 |
| -// Currently unneeded because we refuse to parse docs larger or equal to 4GB. |
133 |
| -memcpy(current_string_buf_loc, &str_length,sizeof(uint32_t)); |
134 |
| -// NULL termination is still handy if you expect all your strings to |
135 |
| -// be NULL terminated? It comes at a small cost |
136 |
| - *dst =0; |
137 |
| - current_string_buf_loc = dst +1; |
| 77 | + builder.increment_count(*this); |
138 | 78 | }
|
139 | 79 |
|
140 | 80 | WARN_UNUSED really_inline error_codeparse_key(constuint8_t *key) {
|
141 |
| -returnparse_string(key,true); |
142 |
| - } |
143 |
| - WARN_UNUSED really_inline error_codeparse_string(constuint8_t *value,bool key =false) { |
144 |
| -log_value(key ?"key" :"string"); |
145 |
| -uint8_t *dst =on_start_string(); |
146 |
| - dst =stringparsing::parse_string(value, dst); |
147 |
| -if (dst ==nullptr) { |
148 |
| -log_error("Invalid escape in string"); |
149 |
| -return STRING_ERROR; |
150 |
| - } |
151 |
| -on_end_string(dst); |
152 |
| -return SUCCESS; |
| 81 | +return builder.parse_key(*this, key); |
| 82 | + } |
| 83 | + WARN_UNUSED really_inline error_codeparse_string(constuint8_t *value) { |
| 84 | +return builder.parse_string(*this, value); |
153 | 85 | }
|
154 |
| - |
155 | 86 | WARN_UNUSED really_inline error_codeparse_number(constuint8_t *value) {
|
156 |
| -log_value("number"); |
157 |
| -if (!numberparsing::parse_number(value, tape)) {log_error("Invalid number");return NUMBER_ERROR; } |
158 |
| -return SUCCESS; |
| 87 | +return builder.parse_number(*this, value); |
159 | 88 | }
|
160 |
| - |
161 |
| - really_inline error_codeparse_root_number(constuint8_t *value) { |
162 |
| -// |
163 |
| -// We need to make a copy to make sure that the string is space terminated. |
164 |
| -// This is not about padding the input, which should already padded up |
165 |
| -// to len + SIMDJSON_PADDING. However, we have no control at this stage |
166 |
| -// on how the padding was done. What if the input string was padded with nulls? |
167 |
| -// It is quite common for an input string to have an extra null character (C string). |
168 |
| -// We do not want to allow 9\0 (where \0 is the null character) inside a JSON |
169 |
| -// document, but the string "9\0" by itself is fine. So we make a copy and |
170 |
| -// pad the input with spaces when we know that there is just one input element. |
171 |
| -// This copy is relatively expensive, but it will almost never be called in |
172 |
| -// practice unless you are in the strange scenario where you have many JSON |
173 |
| -// documents made of single atoms. |
174 |
| -// |
175 |
| -uint8_t *copy =static_cast<uint8_t *>(malloc(remaining_len() + SIMDJSON_PADDING)); |
176 |
| -if (copy ==nullptr) { |
177 |
| -return MEMALLOC; |
178 |
| - } |
179 |
| -memcpy(copy, value,remaining_len()); |
180 |
| -memset(copy +remaining_len(),'', SIMDJSON_PADDING); |
181 |
| - error_code error =parse_number(copy); |
182 |
| -free(copy); |
183 |
| -return error; |
| 89 | + WARN_UNUSED really_inline error_codeparse_root_number(constuint8_t *value) { |
| 90 | +return builder.parse_root_number(*this, value); |
184 | 91 | }
|
185 |
| - |
186 | 92 | WARN_UNUSED really_inline error_codeparse_true_atom(constuint8_t *value) {
|
187 |
| -log_value("true"); |
188 |
| -if (!atomparsing::is_valid_true_atom(value)) {return T_ATOM_ERROR; } |
189 |
| - tape.append(0, internal::tape_type::TRUE_VALUE); |
190 |
| -return SUCCESS; |
| 93 | +return builder.parse_true_atom(*this, value); |
191 | 94 | }
|
192 |
| - |
193 | 95 | WARN_UNUSED really_inline error_codeparse_root_true_atom(constuint8_t *value) {
|
194 |
| -log_value("true"); |
195 |
| -if (!atomparsing::is_valid_true_atom(value,remaining_len())) {return T_ATOM_ERROR; } |
196 |
| - tape.append(0, internal::tape_type::TRUE_VALUE); |
197 |
| -return SUCCESS; |
| 96 | +return builder.parse_root_true_atom(*this, value); |
198 | 97 | }
|
199 |
| - |
200 | 98 | WARN_UNUSED really_inline error_codeparse_false_atom(constuint8_t *value) {
|
201 |
| -log_value("false"); |
202 |
| -if (!atomparsing::is_valid_false_atom(value)) {return F_ATOM_ERROR; } |
203 |
| - tape.append(0, internal::tape_type::FALSE_VALUE); |
204 |
| -return SUCCESS; |
| 99 | +return builder.parse_false_atom(*this, value); |
205 | 100 | }
|
206 |
| - |
207 | 101 | WARN_UNUSED really_inline error_codeparse_root_false_atom(constuint8_t *value) {
|
208 |
| -log_value("false"); |
209 |
| -if (!atomparsing::is_valid_false_atom(value,remaining_len())) {return F_ATOM_ERROR; } |
210 |
| - tape.append(0, internal::tape_type::FALSE_VALUE); |
211 |
| -return SUCCESS; |
| 102 | +return builder.parse_root_false_atom(*this, value); |
212 | 103 | }
|
213 |
| - |
214 | 104 | WARN_UNUSED really_inline error_codeparse_null_atom(constuint8_t *value) {
|
215 |
| -log_value("null"); |
216 |
| -if (!atomparsing::is_valid_null_atom(value)) {return N_ATOM_ERROR; } |
217 |
| - tape.append(0, internal::tape_type::NULL_VALUE); |
218 |
| -return SUCCESS; |
| 105 | +return builder.parse_null_atom(*this, value); |
219 | 106 | }
|
220 |
| - |
221 | 107 | WARN_UNUSED really_inline error_codeparse_root_null_atom(constuint8_t *value) {
|
222 |
| -log_value("null"); |
223 |
| -if (!atomparsing::is_valid_null_atom(value,remaining_len())) {return N_ATOM_ERROR; } |
224 |
| - tape.append(0, internal::tape_type::NULL_VALUE); |
225 |
| -return SUCCESS; |
| 108 | +return builder.parse_root_null_atom(*this, value); |
226 | 109 | }
|
227 | 110 |
|
228 | 111 | WARN_UNUSED really_inline error_codestart() {
|
@@ -266,12 +149,20 @@ struct structural_parser : structural_iterator {
|
266 | 149 | }
|
267 | 150 | };// struct structural_parser
|
268 | 151 |
|
269 |
| -#defineSIMDJSON_TRY(EXPR) {auto _err = (EXPR);if (_err) {return _err; } } |
| 152 | +}// namespace stage2 |
| 153 | +}// namespace SIMDJSON_IMPLEMENTATION |
| 154 | +}// unnamed namespace |
| 155 | + |
| 156 | +#include"generic/stage2/tape_builder.h" |
| 157 | + |
| 158 | +namespace {// Make everything here private |
| 159 | +namespaceSIMDJSON_IMPLEMENTATION { |
| 160 | +namespacestage2 { |
270 | 161 |
|
271 | 162 | template<bool STREAMING>
|
272 | 163 | WARN_UNUSEDstatic really_inline error_codeparse_structurals(dom_parser_implementation &dom_parser, dom::document &doc)noexcept {
|
273 | 164 | dom_parser.doc = &doc;
|
274 |
| - stage2::structural_parserparser(dom_parser, STREAMING ? dom_parser.next_structural_index :0); |
| 165 | + stage2::structural_parser<stage2::tape_builder>parser(dom_parser, STREAMING ? dom_parser.next_structural_index :0); |
275 | 166 | SIMDJSON_TRY( parser.start() );
|
276 | 167 |
|
277 | 168 | //
|
|