@@ -160,6 +160,30 @@ SEXP pkgcache_read_raw(SEXP paths) {
160160return result ;
161161}
162162
163+ // `vlsize` is the total size of the value, without the trailing NL
164+ // `comment_size` is the total size of comment lines, including trailing NL
165+ static SEXP create_value (char * vl ,int vlsize ,int comment_size ) {
166+ SEXP val = Rf_mkCharLenCE (vl ,vlsize - comment_size ,CE_BYTES );
167+ if (comment_size == 0 )return val ;
168+
169+ char * src = vl ,* tgt = (char * )CHAR (val ),* end = tgt + vlsize - comment_size ;
170+
171+ // the value cannot start with a comment, comments begin at column zero
172+ while (tgt < end ) {
173+ if (* src == '\n' && * (src + 1 )== '#' ) {
174+ src ++ ;
175+ while (* src != '\n' )src ++ ;
176+ // do not skip the last newline, we'll copy this over in the next
177+ // iteration, if any, if the next line is not a comment
178+ }else {
179+ * tgt = * src ;
180+ tgt ++ ;
181+ src ++ ;
182+ }
183+ }
184+ return val ;
185+ }
186+
163187/* --------------------------------------------------------------------- */
164188
165189#define S_BG 0/* beginning of the file */
@@ -175,6 +199,8 @@ SEXP pkgcache_parse_description_raw(SEXP raw) {
175199char * kw = NULL ,* vl = NULL ;
176200int kwsize = 0 ,vlsize = 0 ;
177201int linum = 1 ;
202+ int comment_size = 0 ;
203+ int tail_comment = 0 ;
178204
179205SEXP result = PROTECT (allocVector (STRSXP ,200 ));
180206SEXP names = PROTECT (allocVector (STRSXP ,200 ));
@@ -185,6 +211,16 @@ SEXP pkgcache_parse_description_raw(SEXP raw) {
185211
186212/* -- at the begining ---------------------------------------------- */
187213case S_BG :
214+ /* skip comments and whitespace */
215+ while (* p == '#' || isspace (* p )) {
216+ if (* p == '\n' )linum ++ ;
217+ if (* p == '#' ) {
218+ while (* p != '\n' && * p != '\0' )p ++ ;
219+ linum ++ ;
220+ }
221+ if (* p != '\0' )p ++ ;
222+ }
223+
188224if (* p == ':' || * p == '\r' || * p == '\n' || * p == ' ' || * p == '\t' ) {
189225R_THROW_ERROR (
190226"Invalid DESCRIPTION file, must start with an "
@@ -223,6 +259,7 @@ SEXP pkgcache_parse_description_raw(SEXP raw) {
223259
224260/* --- within a value ---------------------------------------------- */
225261case S_VL :
262+ tail_comment = 0 ;
226263/* newline might be the end of the value, if no continuation. */
227264if (* p == '\n' ) {
228265state = S_NL ;
@@ -237,15 +274,27 @@ SEXP pkgcache_parse_description_raw(SEXP raw) {
237274
238275/* -- right after a newline ---------------------------------------- */
239276case S_NL :
277+ /* comment line? */
278+ if (* p == '#' ) {
279+ tail_comment = 1 ;
280+ char * cs = p ;
281+ while (* p != '\n' && * p != '\0' )p ++ ;
282+ // vlsize does not include trailing newlines
283+ vlsize = p - vl ;
284+ if (* p != '\0' )p ++ ;
285+ // comment_size does include trailing newlines
286+ comment_size += (p - cs );
287+
240288/* maybe a continuation line */
241- if (* p == ' ' || * p == '\t' ) {
289+ } else if (* p == ' ' || * p == '\t' ) {
242290state = S_WS ;
243291p ++ ;
244292
245293/* othewise we can save the field, and start parsing the next one */
246294 }else {
247- SET_STRING_ELT (result ,ridx ,Rf_mkCharLenCE (vl ,vlsize ,CE_BYTES ));
295+ SET_STRING_ELT (result ,ridx ,create_value (vl ,vlsize ,comment_size ));
248296SET_STRING_ELT (names ,ridx ,Rf_mkCharLenCE (kw ,kwsize ,CE_NATIVE ));
297+ comment_size = 0 ;
249298ridx ++ ;
250299kw = p ;
251300state = S_KW ;
@@ -277,10 +326,11 @@ SEXP pkgcache_parse_description_raw(SEXP raw) {
277326if (state == S_KW ) {
278327R_THROW_ERROR ("DESCRIPTION file ended while parsing a key" );
279328 }else if (state != S_BG ) {
280- /* Strip the trailing newline(s) */
281- while (p - 1 > start && * (p - 1 )== '\n' )p -- ;
329+ if (tail_comment )p -- ;
330+ /* Strip the trailing newline(s), need to ignore the last comment(s) */
331+ while (p - 1 > start && * (p - 1 )== '\n' )p -- ;
282332vlsize = p - vl ;
283- SET_STRING_ELT (result ,ridx ,Rf_mkCharLenCE (vl ,vlsize ,CE_BYTES ));
333+ SET_STRING_ELT (result ,ridx ,create_value (vl ,vlsize ,comment_size ));
284334SET_STRING_ELT (names ,ridx ,Rf_mkCharLenCE (kw ,kwsize ,CE_NATIVE ));
285335ridx ++ ;
286336 }
@@ -321,14 +371,19 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
321371char tail = p [len - 1 ];
322372p [len - 1 ]= '\0' ;
323373
324- /* Skip whitespace first, check for empty file */
374+ /* Skip comments and whitespace */
375+ while (* p == '#' || isspace (* p )) {
376+ if (* p == '#' )while (* p != '\n' && * p != '\0' )p ++ ;
377+ if (* p != '\0' )p ++ ;
378+ }
379+ const char * begin = p ;
325380
326- while ( * p == '\n' || * p == '\r' ) p ++ ;
381+ /* Check for empty file.*/
327382if (* p == '\0' )return R_NilValue ;
328383
329- /* This is faster than manual search, because strchr is optimized.
330- It is also faster than strstr, for this special case of a two
331- character pattern. */
384+ /*Count the number of packages. This is faster than manual search,
385+ because strchr is optimized. It is also faster than strstr, for this
386+ special case of a two character pattern. */
332387
333388for (;;) {
334389p = strchr (p ,'\n' );
@@ -348,6 +403,8 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
348403char * kw = NULL ,* vl = NULL ;
349404int kwsize = 0 ,vlsize = 0 ;
350405int linum = 1 ;
406+ int comment_size = 0 ;
407+ int tail_comment = 0 ;
351408int max_cols = 1000 ;
352409
353410SEXP nms = PROTECT (allocVector (STRSXP ,max_cols ));
@@ -357,7 +414,7 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
357414hash_create (& table ,nms ,cols ,tab ,max_cols ,npkgs );
358415int npkg = 0 ;
359416
360- p = (char * )RAW ( raw ) ;
417+ p = (char * )begin ;
361418while (* p != '\0' ) {
362419switch (state ) {
363420
@@ -402,6 +459,7 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
402459
403460/* --- within a value ---------------------------------------------- */
404461case S_VL :
462+ tail_comment = 0 ;
405463/* newline might be the end of the value, if no continuation. */
406464if (* p == '\n' ) {
407465state = S_NL ;
@@ -416,16 +474,27 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
416474
417475/* -- right after a newline ---------------------------------------- */
418476case S_NL :
477+ /* comment line, ignore */
478+ if (* p == '#' ) {
479+ tail_comment = 1 ;
480+ char * cs = p ;
481+ while (* p != '\n' && * p != '\0' )p ++ ;
482+ // vlsize does not include trailing newlines
483+ vlsize = p - vl ;
484+ if (* p != '\0' )p ++ ;
485+ // comment_size does include trailing newlines
486+ comment_size += (p - cs );
419487
420488/* maybe a continuation line */
421- if (* p == ' ' || * p == '\t' ) {
489+ } else if (* p == ' ' || * p == '\t' ) {
422490state = S_WS ;
423491p ++ ;
424492
425493/* end of field */
426494 }else {
427495/* Save field */
428- SEXP val = PROTECT (mkCharLenCE (vl ,vlsize ,CE_BYTES ));
496+ SEXP val = PROTECT (create_value (vl ,vlsize ,comment_size ));
497+ comment_size = 0 ;
429498hash_update (& table ,kw ,kwsize ,npkg ,val ,/* err */ 1 );
430499UNPROTECT (1 );
431500
@@ -484,6 +553,8 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
484553vlsize = p - vl ;
485554p = (char * )RAW (raw );
486555p [len - 1 ]= tail ;
556+ /* if ended with a comment, then need to drop last \n */
557+ if (tail_comment )tail = '\n' ;
487558if (state == S_VL && tail != '\n' )vlsize ++ ;
488559/* if the tail is a \n, we don't need that. We also drop \r, which
489560 is possibly not correct, but in practice better */
@@ -493,7 +564,7 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
493564R_THROW_ERROR ("PACKAGES file ended while parsing a key" );
494565 }else if (state != S_BG ) {
495566/* Save field */
496- SEXP val = PROTECT (mkCharLenCE (vl ,vlsize ,CE_BYTES ));
567+ SEXP val = PROTECT (create_value (vl ,vlsize ,comment_size ));
497568hash_update (& table ,kw ,kwsize ,npkg ,val ,/* err= */ 1 );
498569UNPROTECT (1 );
499570 }
@@ -543,6 +614,8 @@ SEXP pkgcache_parse_descriptions(SEXP paths, SEXP lowercase) {
543614kwsize = 0 ;
544615vlsize = 0 ;
545616linum = 1 ;
617+ int comment_size = 0 ;
618+ int tail_comment = 0 ;
546619
547620int len = LENGTH (raw );
548621char * p = (char * )RAW (raw );
@@ -553,6 +626,16 @@ SEXP pkgcache_parse_descriptions(SEXP paths, SEXP lowercase) {
553626switch (state ) {
554627/* -- at the begining -------------------------------------------- */
555628case S_BG :
629+ /* skip comments and whitespace */
630+ while (* p == '#' || isspace (* p )) {
631+ if (* p == '\n' )linum ++ ;
632+ if (* p == '#' ) {
633+ while (* p != '\n' && * p != '\0' )p ++ ;
634+ linum ++ ;
635+ }
636+ if (* p != '\0' )p ++ ;
637+ }
638+
556639if (* p == ':' || * p == '\r' || * p == '\n' || * p == ' ' || * p == '\t' ) {
557640SET_STRING_ELT (
558641errors ,
@@ -606,6 +689,7 @@ SEXP pkgcache_parse_descriptions(SEXP paths, SEXP lowercase) {
606689
607690/* --- within a value -------------------------------------------- */
608691case S_VL :
692+ tail_comment = 0 ;
609693if (* p == '\n' ) {
610694state = S_NL ;
611695vlsize = p - vl ;
@@ -620,14 +704,26 @@ SEXP pkgcache_parse_descriptions(SEXP paths, SEXP lowercase) {
620704
621705/* -- right after a newline -------------------------------------- */
622706case S_NL :
707+ /* comment line? */
708+ if (* p == '#' ) {
709+ tail_comment = 1 ;
710+ char * cs = p ;
711+ while (* p != '\n' && * p != '\0' )p ++ ;
712+ // vlsize does not include trailing newlines
713+ vlsize = p - vl ;
714+ if (* p != '\0' )p ++ ;
715+ // comment_size does include trailing newlines
716+ comment_size += (p - cs );
717+
623718/* maybe a continuation line */
624- if (* p == ' ' || * p == '\t' ) {
719+ } else if (* p == ' ' || * p == '\t' ) {
625720state = S_WS ;
626721p ++ ;
627722
628723/* othewise we can save the field, and start parsing the next one */
629724 }else {
630- SEXP val = PROTECT (mkCharLenCE (vl ,vlsize ,CE_BYTES ));
725+ SEXP val = PROTECT (create_value (vl ,vlsize ,comment_size ));
726+ comment_size = 0 ;
631727hash_update (& table ,kw ,kwsize ,npkg ,val ,1 );
632728UNPROTECT (1 );
633729
@@ -663,7 +759,12 @@ SEXP pkgcache_parse_descriptions(SEXP paths, SEXP lowercase) {
663759vlsize = p - vl ;
664760p = (char * )RAW (raw );
665761p [len - 1 ]= tail ;
762+ /* if ended with a comment, then need to drop last \n */
763+ if (tail_comment )tail = '\n' ;
666764if (state == S_VL && tail != '\n' )vlsize ++ ;
765+ /* if the tail is a \n, we don't need that. We also drop \r, which
766+ is possibly not correct, but in practice better */
767+ if (state == S_NL && (tail == '\n' || tail == '\r' ))vlsize -- ;
667768
668769if (state == S_KW ) {
669770SET_STRING_ELT (
@@ -679,7 +780,7 @@ SEXP pkgcache_parse_descriptions(SEXP paths, SEXP lowercase) {
679780
680781 }else {
681782/* Save field */
682- SEXP val = PROTECT (mkCharLenCE (vl ,vlsize ,CE_BYTES ));
783+ SEXP val = PROTECT (create_value (vl ,vlsize ,comment_size ));
683784hash_update (& table ,kw ,kwsize ,npkg ,val ,/* err = */ 1 );
684785UNPROTECT (1 );
685786 }