88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.27 2006/08/18 19:52:39 tgl Exp $
11+ * $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.28 2006/10/01 22:25:48 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -883,20 +883,25 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
883883#define MIN_RIGHT_CUT 10/* try to keep this far away from EOL */
884884
885885char * wquery ;
886- int clen ,
887- slen ,
886+ int slen ,
887+ cno ,
888888i ,
889- w ,
890889* qidx ,
891890* scridx ,
892891qoffset ,
893892scroffset ,
894893ibeg ,
895894iend ,
896895loc_line ;
897- bool beg_trunc ,
896+ bool mb_encoding ,
897+ beg_trunc ,
898898end_trunc ;
899899
900+ /* Convert loc from 1-based to 0-based; no-op if out of range */
901+ loc -- ;
902+ if (loc < 0 )
903+ return ;
904+
900905/* Need a writable copy of the query */
901906wquery = strdup (query );
902907if (wquery == NULL )
@@ -905,13 +910,13 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
905910/*
906911 * Each character might occupy multiple physical bytes in the string, and
907912 * in some Far Eastern character sets it might take more than one screen
908- * column as well. We compute the starting byte offset and starting
913+ * column as well. We compute the starting byte offset and starting
909914 * screen column of each logical character, and store these in qidx[] and
910915 * scridx[] respectively.
911916 */
912917
913918/* we need a safe allocation size... */
914- slen = strlen (query )+ 1 ;
919+ slen = strlen (wquery )+ 1 ;
915920
916921qidx = (int * )malloc (slen * sizeof (int ));
917922if (qidx == NULL )
@@ -927,79 +932,93 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
927932return ;
928933}
929934
935+ /* We can optimize a bit if it's a single-byte encoding */
936+ mb_encoding = (pg_encoding_max_length (encoding )!= 1 );
937+
938+ /*
939+ * Within the scanning loop, cno is the current character's logical number,
940+ * qoffset is its offset in wquery, and scroffset is its starting logical
941+ * screen column (all indexed from 0). "loc" is the logical character
942+ * number of the error location. We scan to determine loc_line (the
943+ * 1-based line number containing loc) and ibeg/iend (first character
944+ * number and last+1 character number of the line containing loc).
945+ * Note that qidx[] and scridx[] are filled only as far as iend.
946+ */
930947qoffset = 0 ;
931948scroffset = 0 ;
932- for (i = 0 ;query [qoffset ]!= '\0' ;i ++ )
933- {
934- qidx [i ]= qoffset ;
935- scridx [i ]= scroffset ;
936- w = pg_encoding_dsplen (encoding ,& query [qoffset ]);
937- /* treat control chars as width 1; see tab hack below */
938- if (w <=0 )
939- w = 1 ;
940- scroffset += w ;
941- qoffset += pg_encoding_mblen (encoding ,& query [qoffset ]);
942- }
943- qidx [i ]= qoffset ;
944- scridx [i ]= scroffset ;
945- clen = i ;
949+ loc_line = 1 ;
950+ ibeg = 0 ;
951+ iend = -1 ;/* -1 means not set yet */
946952
947- /* convert loc to zero-based offset in qidx/scridx arrays */
948- loc -- ;
949-
950- /* do we have something to show? */
951- if (loc >=0 && loc <=clen )
953+ for (cno = 0 ;wquery [qoffset ]!= '\0' ;cno ++ )
952954{
953- /* input line number of our syntax error. */
954- loc_line = 1 ;
955- /* first included char of extract. */
956- ibeg = 0 ;
957- /* last-plus-1 included char of extract. */
958- iend = clen ;
955+ char ch = wquery [qoffset ];
956+
957+ qidx [cno ]= qoffset ;
958+ scridx [cno ]= scroffset ;
959959
960960/*
961961 * Replace tabs with spaces in the writable copy. (Later we might
962962 * want to think about coping with their variable screen width, but
963963 * not today.)
964- *
965- * Extract line number and begin and end indexes of line containing
966- * error location.There will not be any newlines or carriage returns
967- * in the selected extract.
968964 */
969- for (i = 0 ;i < clen ;i ++ )
965+ if (ch == '\t' )
966+ wquery [qoffset ]= ' ' ;
967+
968+ /*
969+ * If end-of-line, count lines and mark positions. Each \r or \n counts
970+ * as a line except when \r \n appear together.
971+ */
972+ else if (ch == '\r' || ch == '\n' )
970973{
971- /* character length must be 1 or it's not ASCII */
972- if ((qidx [i + 1 ]- qidx [i ])== 1 )
974+ if (cno < loc )
973975{
974- if (wquery [qidx [i ]]== '\t' )
975- wquery [qidx [i ]]= ' ' ;
976- else if (wquery [qidx [i ]]== '\r' || wquery [qidx [i ]]== '\n' )
977- {
978- if (i < loc )
979- {
980- /*
981- * count lines before loc. Each \r or \n counts
982- * as a line except when \r \n appear together.
983- */
984- if (wquery [qidx [i ]]== '\r' ||
985- i == 0 ||
986- (qidx [i ]- qidx [i - 1 ])!= 1 ||
987- wquery [qidx [i - 1 ]]!= '\r' )
988- loc_line ++ ;
989- /* extract beginning = last line start before loc. */
990- ibeg = i + 1 ;
991- }
992- else
993- {
994- /* set extract end. */
995- iend = i ;
996- /* done scanning. */
997- break ;
998- }
999- }
976+ if (ch == '\r' ||
977+ cno == 0 ||
978+ wquery [qidx [cno - 1 ]]!= '\r' )
979+ loc_line ++ ;
980+ /* extract beginning = last line start before loc. */
981+ ibeg = cno + 1 ;
982+ }
983+ else
984+ {
985+ /* set extract end. */
986+ iend = cno ;
987+ /* done scanning. */
988+ break ;
1000989}
1001990}
1002991
992+ /* Advance */
993+ if (mb_encoding )
994+ {
995+ int w ;
996+
997+ w = pg_encoding_dsplen (encoding ,& wquery [qoffset ]);
998+ /* treat any non-tab control chars as width 1 */
999+ if (w <=0 )
1000+ w = 1 ;
1001+ scroffset += w ;
1002+ qoffset += pg_encoding_mblen (encoding ,& wquery [qoffset ]);
1003+ }
1004+ else
1005+ {
1006+ /* We assume wide chars only exist in multibyte encodings */
1007+ scroffset ++ ;
1008+ qoffset ++ ;
1009+ }
1010+ }
1011+ /* Fix up if we didn't find an end-of-line after loc */
1012+ if (iend < 0 )
1013+ {
1014+ iend = cno ;/* query length in chars, +1 */
1015+ qidx [iend ]= qoffset ;
1016+ scridx [iend ]= scroffset ;
1017+ }
1018+
1019+ /* Print only if loc is within computed query length */
1020+ if (loc <=cno )
1021+ {
10031022/* If the line extracted is too long, we truncate it. */
10041023beg_trunc = false;
10051024end_trunc = false;
@@ -1050,7 +1069,8 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
10501069scroffset = 0 ;
10511070for (;i < msg -> len ;i += pg_encoding_mblen (encoding ,& msg -> data [i ]))
10521071{
1053- w = pg_encoding_dsplen (encoding ,& msg -> data [i ]);
1072+ int w = pg_encoding_dsplen (encoding ,& msg -> data [i ]);
1073+
10541074if (w <=0 )
10551075w = 1 ;
10561076scroffset += w ;