@@ -133,7 +133,11 @@ private function evaluateBracket(string $expr, mixed $value): array
133133return [];
134134 }
135135
136- if ('* ' ===$ expr ) {
136+ if (str_contains ($ expr ,', ' ) && (str_starts_with ($ trimmed =trim ($ expr ),', ' ) ||str_ends_with ($ trimmed ,', ' ))) {
137+ throw new JsonCrawlerException ($ expr ,'Expression cannot have leading or trailing commas ' );
138+ }
139+
140+ if ('* ' ===$ expr = JsonPathUtils::normalizeWhitespace ($ expr )) {
137141return array_values ($ value );
138142 }
139143
@@ -168,8 +172,7 @@ private function evaluateBracket(string $expr, mixed $value): array
168172return $ result ;
169173 }
170174
171- // start, end and step
172- if (preg_match ('/^(-?\d*):(-?\d*)(?::(-?\d+))?$/ ' ,$ expr ,$ matches )) {
175+ if (preg_match ('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/ ' ,$ expr ,$ matches )) {
173176if (!array_is_list ($ value )) {
174177return [];
175178 }
@@ -217,14 +220,12 @@ private function evaluateBracket(string $expr, mixed $value): array
217220
218221// filter expressions
219222if (preg_match ('/^\?(.*)$/ ' ,$ expr ,$ matches )) {
220- $ filterExpr =$ matches [1 ];
221-
222- if (preg_match ('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/ ' ,$ filterExpr )) {
223+ if (preg_match ('/^(\w+)\s*\([^()]*\)\s*([<>=!]+.*)?$/ ' ,$ filterExpr =trim ($ matches [1 ]))) {
223224$ filterExpr ="( $ filterExpr) " ;
224225 }
225226
226227if (!str_starts_with ($ filterExpr ,'( ' )) {
227- throw new JsonCrawlerException ( $ expr , ' Invalid filter expression ' ) ;
228+ $ filterExpr = " ( $ filterExpr ) " ;
228229 }
229230
230231// remove outer filter parentheses
@@ -235,30 +236,30 @@ private function evaluateBracket(string $expr, mixed $value): array
235236
236237// comma-separated values, e.g. `['key1', 'key2', 123]` or `[0, 1, 'key']`
237238if (str_contains ($ expr ,', ' )) {
238- $ parts =$ this -> parseCommaSeparatedValues ($ expr );
239+ $ parts =JsonPathUtils:: parseCommaSeparatedValues ($ expr );
239240
240241$ result = [];
241- $ keysIndices =array_keys ($ value );
242- $ isList =array_is_list ($ value );
243242
244243foreach ($ partsas $ part ) {
245244$ part =trim ($ part );
246245
247- if (preg_match ('/^([ \'"])(.*)\1$/ ' ,$ part ,$ matches )) {
246+ if ('* ' ===$ part ) {
247+ $ result =array_merge ($ result ,array_values ($ value ));
248+ }elseif (preg_match ('/^(-?\d*+)\s*+:\s*+(-?\d*+)(?:\s*+:\s*+(-?\d++))?$/ ' ,$ part ,$ matches )) {
249+ // slice notation
250+ $ sliceResult =$ this ->evaluateBracket ($ part ,$ value );
251+ $ result =array_merge ($ result ,$ sliceResult );
252+ }elseif (preg_match ('/^([ \'"])(.*)\1$/ ' ,$ part ,$ matches )) {
248253$ key = JsonPathUtils::unescapeString ($ matches [2 ],$ matches [1 ]);
249254
250- if ($ isList ) {
255+ if (array_is_list ($ value )) {
256+ // for arrays, find ALL objects that contain this key
251257foreach ($ valueas $ item ) {
252258if (\is_array ($ item ) &&\array_key_exists ($ key ,$ item )) {
253259$ result [] =$ item ;
254- break ;
255260 }
256261 }
257-
258- continue ;// no results here
259- }
260-
261- if (\array_key_exists ($ key ,$ value )) {
262+ }elseif (\array_key_exists ($ key ,$ value )) {// for objects, get the value for this key
262263$ result [] =$ value [$ key ];
263264 }
264265 }elseif (preg_match ('/^-?\d+$/ ' ,$ part )) {
@@ -268,14 +269,14 @@ private function evaluateBracket(string $expr, mixed $value): array
268269$ index =\count ($ value ) +$ index ;
269270 }
270271
271- if ($ isList &&\array_key_exists ($ index ,$ value )) {
272+ if (array_is_list ( $ value ) &&\array_key_exists ($ index ,$ value )) {
272273$ result [] =$ value [$ index ];
273- continue ;
274- }
275-
276- // numeric index on a hashmap
277- if ( isset ( $ keysIndices [ $ index ]) && isset ( $ value [$ keysIndices [$ index ]])) {
278- $ result [] = $ value [ $ keysIndices [ $ index ]];
274+ } else {
275+ // numeric index on a hashmap
276+ $ keysIndices = array_keys ( $ value );
277+ if ( isset ( $ keysIndices [ $ index]) && isset ( $ value [ $ keysIndices [ $ index ]])) {
278+ $ result [] = $ value [$ keysIndices [$ index ]];
279+ }
279280 }
280281 }
281282 }
@@ -310,7 +311,29 @@ private function evaluateFilter(string $expr, mixed $value): array
310311
311312private function evaluateFilterExpression (string $ expr ,mixed $ context ):bool
312313 {
313- $ expr =trim ($ expr );
314+ $ expr = JsonPathUtils::normalizeWhitespace ($ expr );
315+
316+ // remove outer parentheses if they wrap the entire expression
317+ if (str_starts_with ($ expr ,'( ' ) &&str_ends_with ($ expr ,') ' )) {
318+ $ depth =0 ;
319+ $ isWrapped =true ;
320+ $ i = -1 ;
321+ while (null !==$ char =$ expr [++$ i ] ??null ) {
322+ if ('( ' ===$ char ) {
323+ ++$ depth ;
324+ }elseif (') ' ===$ char &&0 === --$ depth &&isset ($ expr [$ i +1 ])) {
325+ $ isWrapped =false ;
326+ break ;
327+ }
328+ }
329+ if ($ isWrapped ) {
330+ $ expr =trim (substr ($ expr ,1 , -1 ));
331+ }
332+ }
333+
334+ if (str_starts_with ($ expr ,'! ' )) {
335+ return !$ this ->evaluateFilterExpression (trim (substr ($ expr ,1 )),$ context );
336+ }
314337
315338if (str_contains ($ expr ,'&& ' )) {
316339$ parts =array_map ('trim ' ,explode ('&& ' ,$ expr ));
@@ -353,8 +376,8 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
353376 }
354377
355378// function calls
356- if (preg_match ('/^(\w+) \((.*)\)$/ ' ,$ expr ,$ matches )) {
357- $ functionName =$ matches [1 ];
379+ if (preg_match ('/^(\w++)\s*+ \((.*)\)$/ ' ,$ expr ,$ matches )) {
380+ $ functionName =trim ( $ matches [1 ]) ;
358381if (!isset (self ::RFC9535_FUNCTIONS [$ functionName ])) {
359382throw new JsonCrawlerException ($ expr ,\sprintf ('invalid function "%s" ' ,$ functionName ));
360383 }
@@ -369,8 +392,15 @@ private function evaluateFilterExpression(string $expr, mixed $context): bool
369392
370393private function evaluateScalar (string $ expr ,mixed $ context ):mixed
371394 {
372- if (is_numeric ($ expr )) {
373- return str_contains ($ expr ,'. ' ) ? (float )$ expr : (int )$ expr ;
395+ $ expr = JsonPathUtils::normalizeWhitespace ($ expr );
396+
397+ if (JsonPathUtils::isJsonNumber ($ expr )) {
398+ return str_contains ($ expr ,'. ' ) ||str_contains (strtolower ($ expr ),'e ' ) ? (float )$ expr : (int )$ expr ;
399+ }
400+
401+ // only validate tokens that look like standalone numbers
402+ if (preg_match ('/^[\d+\-.eE]+$/ ' ,$ expr ) &&preg_match ('/\d/ ' ,$ expr )) {
403+ throw new JsonCrawlerException ($ expr ,\sprintf ('Invalid number format "%s" ' ,$ expr ));
374404 }
375405
376406if ('@ ' ===$ expr ) {
@@ -404,9 +434,8 @@ private function evaluateScalar(string $expr, mixed $context): mixed
404434 }
405435
406436// function calls
407- if (preg_match ('/^(\w+)\((.*)\)$/ ' ,$ expr ,$ matches )) {
408- $ functionName =$ matches [1 ];
409- if (!isset (self ::RFC9535_FUNCTIONS [$ functionName ])) {
437+ if (preg_match ('/^(\w++)\((.*)\)$/ ' ,$ expr ,$ matches )) {
438+ if (!isset (self ::RFC9535_FUNCTIONS [$ functionName =trim ($ matches [1 ])])) {
410439throw new JsonCrawlerException ($ expr ,\sprintf ('invalid function "%s" ' ,$ functionName ));
411440 }
412441
@@ -416,31 +445,60 @@ private function evaluateScalar(string $expr, mixed $context): mixed
416445return null ;
417446 }
418447
419- private function evaluateFunction (string $ name ,string $ args ,array $ context ):mixed
448+ private function evaluateFunction (string $ name ,string $ args ,mixed $ context ):mixed
420449 {
421- $ args =array_map (
422- fn ($ arg ) =>$ this ->evaluateScalar (trim ($ arg ),$ context ),
423- explode (', ' ,$ args )
424- );
450+ $ argList = [];
451+ $ nodelistSizes = [];
452+ if ($ args =trim ($ args )) {
453+ $ args = JsonPathUtils::parseCommaSeparatedValues ($ args );
454+ foreach ($ argsas $ arg ) {
455+ $ arg =trim ($ arg );
456+ if (str_starts_with ($ arg ,'$ ' )) {// special handling for absolute paths
457+ $ results =$ this ->evaluate (new JsonPath ($ arg ));
458+ $ argList [] =$ results [0 ] ??null ;
459+ $ nodelistSizes [] =\count ($ results );
460+ }elseif (!str_starts_with ($ arg ,'@ ' )) {// special handling for @ to track nodelist size
461+ $ argList [] =$ this ->evaluateScalar ($ arg ,$ context );
462+ $ nodelistSizes [] =1 ;
463+ }elseif ('@ ' ===$ arg ) {
464+ $ argList [] =$ context ;
465+ $ nodelistSizes [] =1 ;
466+ }elseif (!\is_array ($ context )) {
467+ $ argList [] =null ;
468+ $ nodelistSizes [] =0 ;
469+ }elseif (str_starts_with ($ pathPart =substr ($ arg ,1 ),'[ ' )) {
470+ // handle bracket expressions like @['a','d']
471+ $ results =$ this ->evaluateBracket (substr ($ pathPart ,1 , -1 ),$ context );
472+ $ argList [] =$ results ;
473+ $ nodelistSizes [] =\count ($ results );
474+ }else {
475+ // handle dot notation like @.a
476+ $ results =$ this ->evaluateTokensOnDecodedData (JsonPathTokenizer::tokenize (new JsonPath ('$ ' .$ pathPart )),$ context );
477+ $ argList [] =$ results [0 ] ??null ;
478+ $ nodelistSizes [] =\count ($ results );
479+ }
480+ }
481+ }
425482
426- $ value =$ args [0 ] ??null ;
483+ $ value =$ argList [0 ] ??null ;
484+ $ nodelistSize =$ nodelistSizes [0 ] ??0 ;
427485
428486return match ($ name ) {
429487'length ' =>match (true ) {
430488\is_string ($ value ) =>mb_strlen ($ value ),
431489\is_array ($ value ) =>\count ($ value ),
432490default =>0 ,
433491 },
434- 'count ' =>\is_array ( $ value ) ? \count ( $ value ) : 0 ,
492+ 'count ' =>$ nodelistSize ,
435493'match ' =>match (true ) {
436- \is_string ($ value ) &&\is_string ($ args [1 ] ??null ) => (bool ) @preg_match (\sprintf ('/^%s$/ ' ,$ args [1 ]),$ value ),
494+ \is_string ($ value ) &&\is_string ($ argList [1 ] ??null ) => (bool ) @preg_match (\sprintf ('/^%s$/u ' ,$ this -> transformJsonPathRegex ( $ argList [1 ]) ),$ value ),
437495default =>false ,
438496 },
439497'search ' =>match (true ) {
440- \is_string ($ value ) &&\is_string ($ args [1 ] ??null ) => (bool ) @preg_match ("/ $ args [1 ]/ " ,$ value ),
498+ \is_string ($ value ) &&\is_string ($ argList [1 ] ??null ) => (bool ) @preg_match ("/ { $ this -> transformJsonPathRegex ( $ argList [1 ])} /u " ,$ value ),
441499default =>false ,
442500 },
443- 'value ' =>$ value ,
501+ 'value ' =>1 < $ nodelistSize ? null : ( 1 === $ nodelistSize ? ( \is_array ( $ value) ? ( $ value [ 0 ] ?? null ) : $ value ) : $ value ) ,
444502default =>null ,
445503 };
446504 }
@@ -474,43 +532,51 @@ private function compare(mixed $left, mixed $right, string $operator): bool
474532 };
475533 }
476534
477- private function parseCommaSeparatedValues (string $ expr ):array
535+ /**
536+ * Transforms JSONPath regex patterns to comply with RFC 9535.
537+ *
538+ * The main issue is that '.' should not match \r or \n but should
539+ * match Unicode line separators U+2028 and U+2029.
540+ */
541+ private function transformJsonPathRegex (string $ pattern ):string
478542 {
479- $ parts = [];
480- $ current ='' ;
481- $ inQuotes =false ;
482- $ quoteChar =null ;
483-
484- for ($ i =0 ;$ i <\strlen ($ expr ); ++$ i ) {
485- $ char =$ expr [$ i ];
486-
487- if ('\\' ===$ char &&$ i +1 <\strlen ($ expr )) {
488- $ current .=$ char .$ expr [++$ i ];
543+ $ result ='' ;
544+ $ inCharClass =false ;
545+ $ escaped =false ;
546+ $ i = -1 ;
547+
548+ while (null !==$ char =$ pattern [++$ i ] ??null ) {
549+ if ($ escaped ) {
550+ $ result .=$ char ;
551+ $ escaped =false ;
489552continue ;
490553 }
491554
492- if ('" ' ===$ char ||"' " ===$ char ) {
493- if (!$ inQuotes ) {
494- $ inQuotes =true ;
495- $ quoteChar =$ char ;
496- }elseif ($ char ===$ quoteChar ) {
497- $ inQuotes =false ;
498- $ quoteChar =null ;
499- }
500- }elseif (!$ inQuotes &&', ' ===$ char ) {
501- $ parts [] =trim ($ current );
502- $ current ='' ;
555+ if ('\\' ===$ char ) {
556+ $ result .=$ char ;
557+ $ escaped =true ;
558+ continue ;
559+ }
503560
561+ if ('[ ' ===$ char && !$ inCharClass ) {
562+ $ inCharClass =true ;
563+ $ result .=$ char ;
504564continue ;
505565 }
506566
507- $ current .=$ char ;
508- }
567+ if ('] ' ===$ char &&$ inCharClass ) {
568+ $ inCharClass =false ;
569+ $ result .=$ char ;
570+ continue ;
571+ }
509572
510- if ('' !==$ current ) {
511- $ parts [] =trim ($ current );
573+ if ('. ' ===$ char && !$ inCharClass ) {
574+ $ result .='(?:[^\r\n]|\x{2028}|\x{2029}) ' ;
575+ }else {
576+ $ result .=$ char ;
577+ }
512578 }
513579
514- return $ parts ;
580+ return $ result ;
515581 }
516582}