@@ -133,7 +133,11 @@ private function evaluateBracket(string $expr, mixed $value): array
133133return [];
134134 }
135135
136- if ('* ' ===$ expr ) {
136+ if (str_contains ($ expr ,', ' ) && (str_starts_with ($ trimmed =trim ($ expr ),', ' ) ||str_ends_with ($ trimmed ,', ' ))) {
137+ throw new JsonCrawlerException ($ expr ,'Expression cannot have leading or trailing commas ' );
138+ }
139+
140+ if ('* ' ===$ expr = JsonPathUtils::normalizeWhitespace ($ expr )) {
137141return array_values ($ value );
138142 }
139143
@@ -168,8 +172,7 @@ private function evaluateBracket(string $expr, mixed $value): array
168172return $ result ;
169173 }
170174
171- // start, end and step
172- if (preg_match ('/^(-?\d*):(-?\d*)(?::(-?\d+))?$/ ' ,$ expr ,$ matches )) {
175+ if (preg_match ('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/ ' ,$ expr ,$ matches )) {
173176if (!array_is_list ($ value )) {
174177return [];
175178 }
@@ -217,14 +220,12 @@ private function evaluateBracket(string $expr, mixed $value): array
217220
218221// filter expressions
219222if (preg_match ('/^\?(.*)$/ ' ,$ expr ,$ matches )) {
220- $ filterExpr =$ matches [1 ];
221-
222- if (preg_match ('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/ ' ,$ filterExpr )) {
223+ if (preg_match ('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/ ' ,$ filterExpr =trim ($ matches [1 ]))) {
223224$ filterExpr ="( $ filterExpr) " ;
224225 }
225226
226227if (!str_starts_with ($ filterExpr ,'( ' )) {
227- throw new JsonCrawlerException ( $ expr , ' Invalid filter expression ' ) ;
228+ $ filterExpr = " ( $ filterExpr ) " ;
228229 }
229230
230231// remove outer filter parentheses
@@ -235,30 +236,30 @@ private function evaluateBracket(string $expr, mixed $value): array
235236
236237// comma-separated values, e.g. `['key1', 'key2', 123]` or `[0, 1, 'key']`
237238if (str_contains ($ expr ,', ' )) {
238- $ parts =$ this -> parseCommaSeparatedValues ($ expr );
239+ $ parts =JsonPathUtils:: parseCommaSeparatedValues ($ expr );
239240
240241$ result = [];
241- $ keysIndices =array_keys ($ value );
242- $ isList =array_is_list ($ value );
243242
244243foreach ($ partsas $ part ) {
245244$ part =trim ($ part );
246245
247- if (preg_match ('/^([ \'"])(.*)\1$/ ' ,$ part ,$ matches )) {
246+ if ('* ' ===$ part ) {
247+ $ result =array_merge ($ result ,array_values ($ value ));
248+ }elseif (preg_match ('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/ ' ,$ part ,$ matches )) {
249+ // slice notation
250+ $ sliceResult =$ this ->evaluateBracket ($ part ,$ value );
251+ $ result =array_merge ($ result ,$ sliceResult );
252+ }elseif (preg_match ('/^([ \'"])(.*)\1$/ ' ,$ part ,$ matches )) {
248253$ key = JsonPathUtils::unescapeString ($ matches [2 ],$ matches [1 ]);
249254
250- if ($ isList ) {
255+ if (array_is_list ($ value )) {
256+ // for arrays, find ALL objects that contain this key
251257foreach ($ valueas $ item ) {
252258if (\is_array ($ item ) &&\array_key_exists ($ key ,$ item )) {
253259$ result [] =$ item ;
254- break ;
255260 }
256261 }
257-
258- continue ;// no results here
259- }
260-
261- if (\array_key_exists ($ key ,$ value )) {
262+ }elseif (\array_key_exists ($ key ,$ value )) {// for objects, get the value for this key
262263$ result [] =$ value [$ key ];
263264 }
264265 }elseif (preg_match ('/^-?\d+$/ ' ,$ part )) {
@@ -268,14 +269,14 @@ private function evaluateBracket(string $expr, mixed $value): array
268269$ index =\count ($ value ) +$ index ;
269270 }
270271
271- if ($ isList &&\array_key_exists ($ index ,$ value )) {
272+ if (array_is_list ( $ value ) &&\array_key_exists ($ index ,$ value )) {
272273$ result [] =$ value [$ index ];
273- continue ;
274- }
275-
276- // numeric index on a hashmap
277- if ( isset ( $ keysIndices [ $ index ]) && isset ( $ value [$ keysIndices [$ index ]])) {
278- $ result [] = $ value [ $ keysIndices [ $ index ]];
274+ } else {
275+ // numeric index on a hashmap
276+ $ keysIndices = array_keys ( $ value );
277+ if ( isset ( $ keysIndices [ $ index]) && isset ( $ value [ $ keysIndices [ $ index ]])) {
278+ $ result [] = $ value [$ keysIndices [$ index ]];
279+ }
279280 }
280281 }
281282 }
@@ -310,7 +311,31 @@ private function evaluateFilter(string $expr, mixed $value): array
310311
311312private function evaluateFilterExpression (string $ expr ,mixed $ context ):bool
312313 {
313- $ expr =trim ($ expr );
314+ $ expr = JsonPathUtils::normalizeWhitespace ($ expr );
315+
316+ // remove outer parentheses if they wrap the entire expression
317+ if (str_starts_with ($ expr ,'( ' ) &&str_ends_with ($ expr ,') ' )) {
318+ $ depth =0 ;
319+ $ isWrapped =true ;
320+ for ($ i =0 ;$ i <\strlen ($ expr ); ++$ i ) {
321+ if ('( ' ===$ expr [$ i ]) {
322+ ++$ depth ;
323+ }elseif (') ' ===$ expr [$ i ]) {
324+ --$ depth ;
325+ if (0 ===$ depth &&$ i <\strlen ($ expr ) -1 ) {
326+ $ isWrapped =false ;
327+ break ;
328+ }
329+ }
330+ }
331+ if ($ isWrapped ) {
332+ $ expr =trim (substr ($ expr ,1 , -1 ));
333+ }
334+ }
335+
336+ if (str_starts_with ($ expr ,'! ' )) {
337+ return !$ this ->evaluateFilterExpression (trim (substr ($ expr ,1 )),$ context );
338+ }
314339
315340if (str_contains ($ expr ,'&& ' )) {
316341$ parts =array_map ('trim ' ,explode ('&& ' ,$ expr ));
@@ -353,8 +378,8 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
353378 }
354379
355380// function calls
356- if (preg_match ('/^(\w+) \((.*)\)$/ ' ,$ expr ,$ matches )) {
357- $ functionName =$ matches [1 ];
381+ if (preg_match ('/^(\w++)\s*+ \((.*)\)$/ ' ,$ expr ,$ matches )) {
382+ $ functionName =trim ( $ matches [1 ]) ;
358383if (!isset (self ::RFC9535_FUNCTIONS [$ functionName ])) {
359384throw new JsonCrawlerException ($ expr ,\sprintf ('invalid function "%s" ' ,$ functionName ));
360385 }
@@ -369,8 +394,15 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
369394
370395private function evaluateScalar (string $ expr ,mixed $ context ):mixed
371396 {
372- if (is_numeric ($ expr )) {
373- return str_contains ($ expr ,'. ' ) ? (float )$ expr : (int )$ expr ;
397+ $ expr = JsonPathUtils::normalizeWhitespace ($ expr );
398+
399+ if (JsonPathUtils::isJsonNumber ($ expr )) {
400+ return str_contains ($ expr ,'. ' ) ||str_contains (strtolower ($ expr ),'e ' ) ? (float )$ expr : (int )$ expr ;
401+ }
402+
403+ // only validate tokens that look like standalone numbers
404+ if (preg_match ('/^[\d+\-.eE]+$/ ' ,$ expr ) &&preg_match ('/\d/ ' ,$ expr )) {
405+ throw new JsonCrawlerException ($ expr ,\sprintf ('Invalid number format "%s" ' ,$ expr ));
374406 }
375407
376408if ('@ ' ===$ expr ) {
@@ -404,9 +436,8 @@ private function evaluateScalar(string $expr, mixed $context): mixed
404436 }
405437
406438// function calls
407- if (preg_match ('/^(\w+)\((.*)\)$/ ' ,$ expr ,$ matches )) {
408- $ functionName =$ matches [1 ];
409- if (!isset (self ::RFC9535_FUNCTIONS [$ functionName ])) {
439+ if (preg_match ('/^(\w++)\((.*)\)$/ ' ,$ expr ,$ matches )) {
440+ if (!isset (self ::RFC9535_FUNCTIONS [$ functionName =trim ($ matches [1 ])])) {
410441throw new JsonCrawlerException ($ expr ,\sprintf ('invalid function "%s" ' ,$ functionName ));
411442 }
412443
@@ -416,31 +447,60 @@ private function evaluateScalar(string $expr, mixed $context): mixed
416447return null ;
417448 }
418449
419- private function evaluateFunction (string $ name ,string $ args ,array $ context ):mixed
450+ private function evaluateFunction (string $ name ,string $ args ,mixed $ context ):mixed
420451 {
421- $ args =array_map (
422- fn ($ arg ) =>$ this ->evaluateScalar (trim ($ arg ),$ context ),
423- explode (', ' ,$ args )
424- );
452+ $ argList = [];
453+ $ nodelistSizes = [];
454+ if ($ args =trim ($ args )) {
455+ $ args = JsonPathUtils::parseCommaSeparatedValues ($ args );
456+ foreach ($ argsas $ arg ) {
457+ $ arg =trim ($ arg );
458+ if (str_starts_with ($ arg ,'$ ' )) {// special handling for absolute paths
459+ $ results =$ this ->evaluate (new JsonPath ($ arg ));
460+ $ argList [] =$ results [0 ] ??null ;
461+ $ nodelistSizes [] =\count ($ results );
462+ }elseif (!str_starts_with ($ arg ,'@ ' )) {// special handling for @ to track nodelist size
463+ $ argList [] =$ this ->evaluateScalar ($ arg ,$ context );
464+ $ nodelistSizes [] =1 ;
465+ }elseif ('@ ' ===$ arg ) {
466+ $ argList [] =$ context ;
467+ $ nodelistSizes [] =1 ;
468+ }elseif (!\is_array ($ context )) {
469+ $ argList [] =null ;
470+ $ nodelistSizes [] =0 ;
471+ }elseif (str_starts_with ($ pathPart =substr ($ arg ,1 ),'[ ' )) {
472+ // handle bracket expressions like @['a','d']
473+ $ results =$ this ->evaluateBracket (substr ($ pathPart ,1 , -1 ),$ context );
474+ $ argList [] =$ results ;
475+ $ nodelistSizes [] =\count ($ results );
476+ }else {
477+ // handle dot notation like @.a
478+ $ results =$ this ->evaluateTokensOnDecodedData (JsonPathTokenizer::tokenize (new JsonPath ('$ ' .$ pathPart )),$ context );
479+ $ argList [] =$ results [0 ] ??null ;
480+ $ nodelistSizes [] =\count ($ results );
481+ }
482+ }
483+ }
425484
426- $ value =$ args [0 ] ??null ;
485+ $ value =$ argList [0 ] ??null ;
486+ $ nodelistSize =$ nodelistSizes [0 ] ??0 ;
427487
428488return match ($ name ) {
429489'length ' =>match (true ) {
430490\is_string ($ value ) =>mb_strlen ($ value ),
431491\is_array ($ value ) =>\count ($ value ),
432492default =>0 ,
433493 },
434- 'count ' =>\is_array ( $ value ) ? \count ( $ value ) : 0 ,
494+ 'count ' =>$ nodelistSize ,
435495'match ' =>match (true ) {
436- \is_string ($ value ) &&\is_string ($ args [1 ] ??null ) => (bool ) @preg_match (\sprintf ('/^%s$/ ' ,$ args [1 ]),$ value ),
496+ \is_string ($ value ) &&\is_string ($ argList [1 ] ??null ) => (bool ) @preg_match (\sprintf ('/^%s$/u ' ,$ this -> transformJsonPathRegex ( $ argList [1 ]) ),$ value ),
437497default =>false ,
438498 },
439499'search ' =>match (true ) {
440- \is_string ($ value ) &&\is_string ($ args [1 ] ??null ) => (bool ) @preg_match ("/ $ args [1 ]/ " ,$ value ),
500+ \is_string ($ value ) &&\is_string ($ argList [1 ] ??null ) => (bool ) @preg_match ("/ { $ this -> transformJsonPathRegex ( $ argList [1 ])} /u " ,$ value ),
441501default =>false ,
442502 },
443- 'value ' =>$ value ,
503+ 'value ' =>1 < $ nodelistSize ? null : ( 1 === $ nodelistSize ? ( \is_array ( $ value) ? ( $ value [ 0 ] ?? null ) : $ value ) : $ value ) ,
444504default =>null ,
445505 };
446506 }
@@ -474,43 +534,52 @@ private function compare(mixed $left, mixed $right, string $operator): bool
474534 };
475535 }
476536
477- private function parseCommaSeparatedValues (string $ expr ):array
537+ /*
538+ * Transform JSONPath regex patterns to comply with RFC 9535. The main issue is
539+ * that '.' should not match \r or \n but should match Unicode line
540+ * separators U+2028 and U+2029.
541+ */
542+ private function transformJsonPathRegex (string $ pattern ):string
478543 {
479- $ parts =[] ;
480- $ current ='' ;
481- $ inQuotes =false ;
482- $ quoteChar =null ;
544+ $ result ='' ;
545+ $ inCharClass =false ;
546+ $ escaped =false ;
547+ $ length =\strlen ( $ pattern ) ;
483548
484- for ($ i =0 ;$ i <\strlen ( $ expr ) ; ++$ i ) {
485- $ char =$ expr [$ i ];
549+ for ($ i =0 ;$ i <$ length ; ++$ i ) {
550+ $ char =$ pattern [$ i ];
486551
487- if ('\\' ===$ char &&$ i +1 <\strlen ($ expr )) {
488- $ current .=$ char .$ expr [++$ i ];
552+ if ($ escaped ) {
553+ $ result .=$ char ;
554+ $ escaped =false ;
489555continue ;
490556 }
491557
492- if ('" ' ===$ char ||"' " ===$ char ) {
493- if (!$ inQuotes ) {
494- $ inQuotes =true ;
495- $ quoteChar =$ char ;
496- }elseif ($ char ===$ quoteChar ) {
497- $ inQuotes =false ;
498- $ quoteChar =null ;
499- }
500- }elseif (!$ inQuotes &&', ' ===$ char ) {
501- $ parts [] =trim ($ current );
502- $ current ='' ;
558+ if ('\\' ===$ char ) {
559+ $ result .=$ char ;
560+ $ escaped =true ;
561+ continue ;
562+ }
503563
564+ if ('[ ' ===$ char && !$ inCharClass ) {
565+ $ inCharClass =true ;
566+ $ result .=$ char ;
504567continue ;
505568 }
506569
507- $ current .=$ char ;
508- }
570+ if ('] ' ===$ char &&$ inCharClass ) {
571+ $ inCharClass =false ;
572+ $ result .=$ char ;
573+ continue ;
574+ }
509575
510- if ('' !==$ current ) {
511- $ parts [] =trim ($ current );
576+ if ('. ' ===$ char && !$ inCharClass ) {
577+ $ result .='(?:[^\r\n]|\x{2028}|\x{2029}) ' ;
578+ }else {
579+ $ result .=$ char ;
580+ }
512581 }
513582
514- return $ parts ;
583+ return $ result ;
515584 }
516585}