@@ -205,32 +205,6 @@ gm_attach_file <- function(mime, filename, type = NULL, id = NULL, ...) {
205205 )
206206}
207207
208- header_encode <- function (x ) {
209- x <- enc2utf8(unlist(strsplit(as.character(x )," , ?" )))
210-
211- # this won't deal with <> used in quotes, but I think it is rare enough that
212- # is ok
213- m <- rematch2 :: re_match(x ," ^(?<phrase>[^<]*?)(?: *<(?<addr_spec>[^>]+)>)?$" )
214- res <- character (length(x ))
215-
216- # simple addresses contain no <>, so we don't need to do anything further
217- simple <- ! nzchar(m $ addr_spec )
218- res [simple ]<- m $ phrase [simple ]
219-
220- # complex addresses may need to be base64-encoded
221- needs_encoding <- Encoding(m $ phrase )!= " unknown"
222- res [needs_encoding ]<- sprintf(
223- " =?utf-8?B?%s?=" ,
224- vcapply(m $ phrase [needs_encoding ],encode_base64 )
225- )
226- res [! needs_encoding ]<- m $ phrase [! needs_encoding ]
227-
228- # Add the addr_spec onto non-simple examples
229- res [! simple ]<- sprintf(" %s <%s>" ,res [! simple ],m $ addr_spec [! simple ])
230-
231- paste0(res ,collapse = " ," )
232- }
233-
234208# ' Convert a mime object to character representation
235209# '
236210# ' This function converts a mime object into a character vector
@@ -240,8 +214,9 @@ header_encode <- function(x) {
240214# ' @param ... further arguments ignored
241215# ' @export
242216as.character.mime <- function (x ,newline = " \r\n " ,... ) {
243- # encode headers
244- x $ header <- lapply(x $ header ,header_encode )
217+ for (i in seq_along(x $ header )) {
218+ x $ header [[i ]]<- encode_header(names(x $ header )[i ],x $ header [[i ]])
219+ }
245220
246221# Check if we need nested structure ((text + HTML) + attachments)
247222has_both_bodies <- exists_list(x $ parts ,TEXT_PART )&&
@@ -364,3 +339,123 @@ with_defaults <- function(defaults, ...) {
364339missing <- setdiff(names(defaults ), names(args ))
365340 c(defaults [missing ],args )
366341}
342+
343+ # Header encoding helpers ------------------------------------------------------
344+ #
345+ # In general, the Gmail API requires following RFC 2822 Internet Message Format
346+ # https://datatracker.ietf.org/doc/html/rfc2822
347+ #
348+ # Then, within that, non-ASCII text in headers is addressed in RFC 2047 MIME
349+ # Part Three: Message Header Extensions for Non-ASCII Text
350+ # https://datatracker.ietf.org/doc/html/rfc2047
351+ #
352+ # Refactoring the header processing was motivated by
353+ # https://github.com/r-lib/gmailr/issues/193
354+
355+ # Strategy: Divide headers into address headers vs. everything else.
356+ #
357+ # Use existing helper to encode address headers, as it was clearly written for
358+ # that use case.
359+ #
360+ # Use a new helper for other headers, that can deal with "folding" (see the RFC)
361+ # long-ish, non-ASCII text, e.g. in the Subject.
362+
363+ encode_header <- function (name ,value ) {
364+ address_headers <- c(
365+ " To" ,
366+ " From" ,
367+ " Cc" ,
368+ " Bcc" ,
369+ " Reply-To" ,
370+ " Sender" ,
371+ " Resent-To" ,
372+ " Resent-From" ,
373+ " Resent-Cc" ,
374+ " Resent-Bcc" ,
375+ " Resent-Sender"
376+ )
377+
378+ fun <- if (name %in% address_headers ) {
379+ header_encode_address
380+ }else {
381+ header_encode_text
382+ }
383+ fun(value )
384+ }
385+
386+ # Pre-existing helper now renamed to reflect its motivating use case.
387+ # - May contain multiple comma-separated addresses
388+ # - Each address may have the format "Name" <email@example.com>
389+ # - Only the "Name" part needs encoding, not the email address
390+ header_encode_address <- function (x ) {
391+ x <- enc2utf8(unlist(strsplit(as.character(x )," , ?" )))
392+
393+ # this won't deal with <> used in quotes, but I think it is rare enough that
394+ # is ok
395+ m <- rematch2 :: re_match(x ," ^(?<phrase>[^<]*?)(?: *<(?<addr_spec>[^>]+)>)?$" )
396+ res <- character (length(x ))
397+
398+ # simple addresses contain no <>, so we don't need to do anything further
399+ simple <- ! nzchar(m $ addr_spec )
400+ res [simple ]<- m $ phrase [simple ]
401+
402+ # complex addresses may need to be base64-encoded
403+ needs_encoding <- Encoding(m $ phrase )!= " unknown"
404+ res [needs_encoding ]<- sprintf(
405+ " =?utf-8?B?%s?=" ,
406+ vcapply(m $ phrase [needs_encoding ],encode_base64 )
407+ )
408+ res [! needs_encoding ]<- m $ phrase [! needs_encoding ]
409+
410+ # Add the addr_spec onto non-simple examples
411+ res [! simple ]<- sprintf(" %s <%s>" ,res [! simple ],m $ addr_spec [! simple ])
412+
413+ paste0(res ,collapse = " ," )
414+ }
415+
416+ # New helper for a generic "text" header
417+ # - Single value (not comma-separated)
418+ # - May contain long Unicode text that exceeds RFC 2047's 75-character limit
419+ # - Must be "folded" into multiple encoded-words if too long
420+ header_encode_text <- function (x ) {
421+ if (length(x )== 0 || is.null(x )) {
422+ return (x )
423+ }
424+
425+ x <- enc2utf8(as.character(x ))
426+
427+ # Pass pure ASCII through unchanged
428+ if (Encoding(x )== " unknown" ) {
429+ return (x )
430+ }
431+
432+ # First, get a single base64-encoded string
433+ b64_full <- encode_base64(x ,line_length = 0L ,newline = " " )
434+ b64_len <- nchar(b64_full )
435+
436+ # encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
437+ # charset is utf-8
438+ # encoding is "B" (as opposed to "Q"), as in "BASE64"
439+ encode_word <- function (b64 ) sprintf(" =?utf-8?B?%s?=" ,b64 )
440+
441+ # RFC 2047: "An 'encoded-word' may not be more than 75 characters long,
442+ # including 'charset', 'encoding', 'encoded-text', and delimiters."
443+ # Format: =?utf-8?B?<encoded-text>?=
444+ # The formalities account for 12 characters, which leaves up to 63 characters
445+ # for the encoded text. However, base64 works in 4-character groups, so we
446+ # must use a multiple of 4: the largest is 60.
447+ max_b64_per_word <- 60
448+
449+ # Return as single encoded-word, if possible
450+ if (b64_len < = max_b64_per_word ) {
451+ return (encode_word(b64_full ))
452+ }
453+
454+ # Otherwise, split into multiple encoded-words
455+ starts <- seq(1L ,b64_len ,by = max_b64_per_word )
456+ stops <- c(starts [- 1 ]- 1L ,b64_len )
457+ encoded_words <- encode_word(substring(b64_full ,starts ,stops ))
458+
459+ # Join multiple encoded-words with CRLF SPACE per RFC 2047
460+ paste0(encoded_words ,collapse = " \r\n " )
461+ }