88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.112 2004/09/16 03:15:52 neilc Exp $
11+ * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.113 2004/09/27 01:39:02 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
7474#define RETURN_NULL (type ) do { *isNull = true; return (type) 0; } while (0)
7575
7676static int ArrayCount (char * str ,int * dim ,char typdelim );
77- static Datum * ReadArrayStr (char * arrayStr ,int nitems ,int ndim ,int * dim ,
77+ static Datum * ReadArrayStr (char * arrayStr ,const char * origStr ,
78+ int nitems ,int ndim ,int * dim ,
7879FmgrInfo * inputproc ,Oid typioparam ,int32 typmod ,
7980char typdelim ,
8081int typlen ,bool typbyval ,char typalign ,
@@ -325,7 +326,8 @@ array_in(PG_FUNCTION_ARGS)
325326ereport (ERROR ,
326327(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
327328errmsg ("missing left brace" )));
328- dataPtr = ReadArrayStr (p ,nitems ,ndim ,dim ,& my_extra -> proc ,typioparam ,
329+ dataPtr = ReadArrayStr (p ,string ,
330+ nitems ,ndim ,dim ,& my_extra -> proc ,typioparam ,
329331typmod ,typdelim ,typlen ,typbyval ,typalign ,
330332& nbytes );
331333nbytes += ARR_OVERHEAD (ndim );
@@ -371,7 +373,7 @@ ArrayCount(char *str, int *dim, char typdelim)
371373temp [MAXDIM ],
372374nelems [MAXDIM ],
373375nelems_last [MAXDIM ];
374- bool scanning_string = false;
376+ bool in_quotes = false;
375377bool eoArray = false;
376378bool empty_array = true;
377379char * ptr ;
@@ -443,14 +445,14 @@ ArrayCount(char *str, int *dim, char typdelim)
443445ereport (ERROR ,
444446 (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
445447errmsg ("malformed array literal: \"%s\"" ,str )));
446- scanning_string = !scanning_string ;
447- if (scanning_string )
448+ in_quotes = !in_quotes ;
449+ if (in_quotes )
448450parse_state = ARRAY_QUOTED_ELEM_STARTED ;
449451else
450452parse_state = ARRAY_QUOTED_ELEM_COMPLETED ;
451453break ;
452454case '{' :
453- if (!scanning_string )
455+ if (!in_quotes )
454456{
455457/*
456458 * A left brace can occur if no nesting has
@@ -476,7 +478,7 @@ ArrayCount(char *str, int *dim, char typdelim)
476478}
477479break ;
478480case '}' :
479- if (!scanning_string )
481+ if (!in_quotes )
480482{
481483/*
482484 * A right brace can occur after an element start,
@@ -520,7 +522,7 @@ ArrayCount(char *str, int *dim, char typdelim)
520522}
521523break ;
522524default :
523- if (!scanning_string )
525+ if (!in_quotes )
524526{
525527if (* ptr == typdelim )
526528{
@@ -595,16 +597,19 @@ ArrayCount(char *str, int *dim, char typdelim)
595597 * declaration. Unspecified elements are initialized to zero for fixed length
596598 * base types and to empty varlena structures for variable length base
597599 * types. (This is pretty bogus; NULL would be much safer.)
600+ *
598601 * result :
599602 * returns a palloc'd array of Datum representations of the array elements.
600603 * If element type is pass-by-ref, the Datums point to palloc'd values.
601604 * *nbytes is set to the amount of data space needed for the array,
602605 * including alignment padding but not including array header overhead.
603- * CAUTION: the contents of "arrayStr" may be modified!
606+ *
607+ * CAUTION: the contents of "arrayStr" will be modified!
604608 *---------------------------------------------------------------------------
605609 */
606610static Datum *
607611ReadArrayStr (char * arrayStr ,
612+ const char * origStr ,
608613int nitems ,
609614int ndim ,
610615int * dim ,
@@ -620,154 +625,158 @@ ReadArrayStr(char *arrayStr,
620625int i ,
621626nest_level = 0 ;
622627Datum * values ;
623- char * ptr ;
624- bool scanning_string = false;
628+ char * srcptr ;
629+ bool in_quotes = false;
625630bool eoArray = false;
631+ int totbytes ;
626632int indx [MAXDIM ],
627633prod [MAXDIM ];
628634
629635mda_get_prod (ndim ,dim ,prod );
630636values = (Datum * )palloc0 (nitems * sizeof (Datum ));
631637MemSet (indx ,0 ,sizeof (indx ));
632638
633- /* read array enclosed within {} */
634- ptr = arrayStr ;
639+ /*
640+ * We have to remove " and \ characters to create a clean item value
641+ * to pass to the datatype input routine. We overwrite each item
642+ * value in-place within arrayStr to do this. srcptr is the current
643+ * scan point, and dstptr is where we are copying to.
644+ *
645+ * We also want to suppress leading and trailing unquoted whitespace.
646+ * We use the leadingspace flag to suppress leading space. Trailing
647+ * space is tracked by using dstendptr to point to the last significant
648+ * output character.
649+ *
650+ * The error checking in this routine is mostly pro-forma, since we
651+ * expect that ArrayCount() already validated the string.
652+ */
653+ srcptr = arrayStr ;
635654while (!eoArray )
636655{
637656bool itemdone = false;
638- bool itemquoted = false;
639- int i = -1 ;
657+ bool leadingspace = true;
640658char * itemstart ;
641- char * eptr ;
659+ char * dstptr ;
660+ char * dstendptr ;
642661
643- /* skip leading whitespace */
644- while (isspace ((unsignedchar )* ptr ))
645- ptr ++ ;
646-
647- itemstart = ptr ;
662+ i = -1 ;
663+ itemstart = dstptr = dstendptr = srcptr ;
648664
649665while (!itemdone )
650666{
651- switch (* ptr )
667+ switch (* srcptr )
652668{
653669case '\0' :
654670/* Signal a premature end of the string */
655671ereport (ERROR ,
656672(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
657- errmsg ("malformed array literal: \"%s\"" ,arrayStr )));
673+ errmsg ("malformed array literal: \"%s\"" ,
674+ origStr )));
658675break ;
659676case '\\' :
660- {
661- char * cptr ;
662-
663- /* Crunch the string on top of the backslash. */
664- for ( cptr = ptr ; * cptr != '\0' ; cptr ++ )
665- * cptr = * ( cptr + 1 );
666- if ( * ptr == '\0' )
667- ereport ( ERROR ,
668- ( errcode ( ERRCODE_INVALID_TEXT_REPRESENTATION ),
669- errmsg ( "malformed array literal: \"%s\"" , arrayStr ))) ;
670- break ;
671- }
677+ /* Skip backslash, copy next character as-is. */
678+ srcptr ++ ;
679+ if ( * srcptr == '\0' )
680+ ereport ( ERROR ,
681+ ( errcode ( ERRCODE_INVALID_TEXT_REPRESENTATION ),
682+ errmsg ( "malformed array literal: \"%s\"" ,
683+ origStr )));
684+ * dstptr ++ = * srcptr ++ ;
685+ /* Treat the escaped character as non-whitespace */
686+ leadingspace = false ;
687+ dstendptr = dstptr ;
688+ break ;
672689case '\"' :
690+ in_quotes = !in_quotes ;
691+ if (in_quotes )
692+ leadingspace = false;
693+ else
673694{
674- char * cptr ;
675-
676- scanning_string = !scanning_string ;
677- if (scanning_string )
678- {
679- itemquoted = true;
680-
681- /*
682- * Crunch the string on top of the first
683- * quote.
684- */
685- for (cptr = ptr ;* cptr != '\0' ;cptr ++ )
686- * cptr = * (cptr + 1 );
687- /* Back up to not miss following character. */
688- ptr -- ;
689- }
690- break ;
695+ /*
696+ * Advance dstendptr when we exit in_quotes; this
697+ * saves having to do it in all the other in_quotes
698+ * cases.
699+ */
700+ dstendptr = dstptr ;
691701}
702+ srcptr ++ ;
703+ break ;
692704case '{' :
693- if (!scanning_string )
705+ if (!in_quotes )
694706{
695707if (nest_level >=ndim )
696708ereport (ERROR ,
697- (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
698- errmsg ("malformed array literal: \"%s\"" ,arrayStr )));
709+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
710+ errmsg ("malformed array literal: \"%s\"" ,
711+ origStr )));
699712nest_level ++ ;
700713indx [nest_level - 1 ]= 0 ;
701- /* skip leading whitespace */
702- while (isspace ((unsignedchar )* (ptr + 1 )))
703- ptr ++ ;
704- itemstart = ptr + 1 ;
714+ srcptr ++ ;
705715}
716+ else
717+ * dstptr ++ = * srcptr ++ ;
706718break ;
707719case '}' :
708- if (!scanning_string )
720+ if (!in_quotes )
709721{
710722if (nest_level == 0 )
711723ereport (ERROR ,
712- (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
713- errmsg ("malformed array literal: \"%s\"" ,arrayStr )));
724+ (errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
725+ errmsg ("malformed array literal: \"%s\"" ,
726+ origStr )));
714727if (i == -1 )
715728i = ArrayGetOffset0 (ndim ,indx ,prod );
716729indx [nest_level - 1 ]= 0 ;
717730nest_level -- ;
718731if (nest_level == 0 )
719732eoArray = itemdone = true;
720733else
721- {
722- /*
723- * tricky coding: terminate item value string
724- * at first '}', but don't process it till we
725- * see a typdelim char or end of array. This
726- * handles case where several '}'s appear
727- * successively in a multidimensional array.
728- */
729- * ptr = '\0' ;
730734indx [nest_level - 1 ]++ ;
731- }
735+ srcptr ++ ;
732736}
737+ else
738+ * dstptr ++ = * srcptr ++ ;
733739break ;
734740default :
735- if (* ptr == typdelim && !scanning_string )
741+ if (in_quotes )
742+ * dstptr ++ = * srcptr ++ ;
743+ else if (* srcptr == typdelim )
736744{
737745if (i == -1 )
738746i = ArrayGetOffset0 (ndim ,indx ,prod );
739747itemdone = true;
740748indx [ndim - 1 ]++ ;
749+ srcptr ++ ;
750+ }
751+ else if (isspace ((unsignedchar )* srcptr ))
752+ {
753+ /*
754+ * If leading space, drop it immediately. Else,
755+ * copy but don't advance dstendptr.
756+ */
757+ if (leadingspace )
758+ srcptr ++ ;
759+ else
760+ * dstptr ++ = * srcptr ++ ;
761+ }
762+ else
763+ {
764+ * dstptr ++ = * srcptr ++ ;
765+ leadingspace = false;
766+ dstendptr = dstptr ;
741767}
742768break ;
743769}
744- if (!itemdone )
745- ptr ++ ;
746770}
747- * ptr ++ = '\0' ;
771+
772+ Assert (dstptr < srcptr );
773+ * dstendptr = '\0' ;
774+
748775if (i < 0 || i >=nitems )
749776ereport (ERROR ,
750777(errcode (ERRCODE_INVALID_TEXT_REPRESENTATION ),
751- errmsg ("malformed array literal: \"%s\"" ,arrayStr )));
752-
753- /*
754- * skip trailing whitespace
755- */
756- eptr = ptr - 1 ;
757- if (!itemquoted )
758- {
759- /* skip to last non-NULL, non-space, character */
760- while ((* eptr == '\0' )|| (isspace ((unsignedchar )* eptr )))
761- eptr -- ;
762- * (++ eptr )= '\0' ;
763- }
764- else
765- {
766- /* skip to last quote character */
767- while (* eptr != '"' )
768- eptr -- ;
769- * eptr = '\0' ;
770- }
778+ errmsg ("malformed array literal: \"%s\"" ,
779+ origStr )));
771780
772781values [i ]= FunctionCall3 (inputproc ,
773782CStringGetDatum (itemstart ),
@@ -780,7 +789,7 @@ ReadArrayStr(char *arrayStr,
780789 */
781790if (typlen > 0 )
782791{
783- * nbytes = nitems * att_align (typlen ,typalign );
792+ totbytes = nitems * att_align (typlen ,typalign );
784793if (!typbyval )
785794for (i = 0 ;i < nitems ;i ++ )
786795if (values [i ]== (Datum )0 )
@@ -789,36 +798,37 @@ ReadArrayStr(char *arrayStr,
789798else
790799{
791800Assert (!typbyval );
792- * nbytes = 0 ;
801+ totbytes = 0 ;
793802for (i = 0 ;i < nitems ;i ++ )
794803{
795804if (values [i ]!= (Datum )0 )
796805{
797806/* let's just make sure data is not toasted */
798807if (typlen == -1 )
799808values [i ]= PointerGetDatum (PG_DETOAST_DATUM (values [i ]));
800- * nbytes = att_addlength (* nbytes ,typlen ,values [i ]);
801- * nbytes = att_align (* nbytes ,typalign );
809+ totbytes = att_addlength (totbytes ,typlen ,values [i ]);
810+ totbytes = att_align (totbytes ,typalign );
802811}
803812else if (typlen == -1 )
804813{
805814/* dummy varlena value (XXX bogus, see notes above) */
806815values [i ]= PointerGetDatum (palloc (sizeof (int32 )));
807816VARATT_SIZEP (DatumGetPointer (values [i ]))= sizeof (int32 );
808- * nbytes += sizeof (int32 );
809- * nbytes = att_align (* nbytes ,typalign );
817+ totbytes += sizeof (int32 );
818+ totbytes = att_align (totbytes ,typalign );
810819}
811820else
812821{
813822/* dummy cstring value */
814823Assert (typlen == -2 );
815824values [i ]= PointerGetDatum (palloc (1 ));
816825* ((char * )DatumGetPointer (values [i ]))= '\0' ;
817- * nbytes += 1 ;
818- * nbytes = att_align (* nbytes ,typalign );
826+ totbytes += 1 ;
827+ totbytes = att_align (totbytes ,typalign );
819828}
820829}
821830}
831+ * nbytes = totbytes ;
822832return values ;
823833}
824834