NotificationsYou must be signed in to change notification settings
Fork56
Star236

Commitd4b5d41

authored

Fold text headers according to RFC 2047 (#211)

* Pass the arguments of encode_base64() along* Handle long-ish `Subject` with non-ASCII characters* Harden against R 4.1 on Windows

1 parent38a6f3e commitd4b5d41Copy full SHA for d4b5d41

File tree

4 files changed

+207

-39

lines changed

4 files changed

+207

-39

lines changed

`‎NEWS.md‎`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,8 @@`
`5`	`5`	- Previously:`c("condition", "error", "gmail_error")`
`6`	`6`	- Now:`c("gmailr_error", "gargle_error_request_failed", "http_error_{XXX}", "gargle_error", "rlang_error", "error", "condition")`
`7`	`7`
	`8`	+* Text headers, such as`Subject`, are now properly prepared as per RFC 2047, fixing a problem with long-ish headers that contain non-ASCII characters (#193).
	`9`	`+`
`8`	`10`	`##Deprecations`
`9`	`11`
`10`	`12`	* Functions that lack the`gm_` prefix have been removed, concluding a deprecation process that kicked off with gmailr 1.0.0 (released 2019-08-23). These functions were hard deprecated in gmailr 2.0.0 (released 2023-06-30). This eliminates many name conflicts with other packages (including the base package).

`‎R/gm_mime.R‎`

Lines changed: 123 additions & 28 deletions

Original file line number	Diff line number	Diff line change
`@@ -205,32 +205,6 @@ gm_attach_file <- function(mime, filename, type = NULL, id = NULL, ...) {`
`205`	`205`	`)`
`206`	`206`	`}`
`207`	`207`
`208`		`-header_encode<-function(x) {`
`209`		`-x<- enc2utf8(unlist(strsplit(as.character(x),", ?")))`
`210`		`-`
`211`		`-# this won't deal with <> used in quotes, but I think it is rare enough that`
`212`		`-# is ok`
`213`		`-m<-rematch2::re_match(x,"^(?<phrase>[^<]?)(?: <(?<addr_spec>[^>]+)>)?$")`
`214`		`-res<-character(length(x))`
`215`		`-`
`216`		`-# simple addresses contain no <>, so we don't need to do anything further`
`217`		`-simple<-!nzchar(m$addr_spec)`
`218`		`-res[simple]<-m$phrase[simple]`
`219`		`-`
`220`		`-# complex addresses may need to be base64-encoded`
`221`		`-needs_encoding<- Encoding(m$phrase)!="unknown"`
`222`		`-res[needs_encoding]<- sprintf(`
`223`		`-"=?utf-8?B?%s?=",`
`224`		`- vcapply(m$phrase[needs_encoding],encode_base64)`
`225`		`- )`
`226`		`-res[!needs_encoding]<-m$phrase[!needs_encoding]`
`227`		`-`
`228`		`-# Add the addr_spec onto non-simple examples`
`229`		`-res[!simple]<- sprintf("%s <%s>",res[!simple],m$addr_spec[!simple])`
`230`		`-`
`231`		`- paste0(res,collapse=",")`
`232`		`-}`
`233`		`-`
`234`	`208`	`#' Convert a mime object to character representation`
`235`	`209`	`#'`
`236`	`210`	`#' This function converts a mime object into a character vector`
`@@ -240,8 +214,9 @@ header_encode <- function(x) {`
`240`	`214`	`#' @param ... further arguments ignored`
`241`	`215`	`#' @export`
`242`	`216`	`as.character.mime<-function(x,newline="\r\n",...) {`
`243`		`-# encode headers`
`244`		`-x$header<- lapply(x$header,header_encode)`
	`217`	`+for (iin seq_along(x$header)) {`
	`218`	`+x$header[[i]]<- encode_header(names(x$header)[i],x$header[[i]])`
	`219`	`+ }`
`245`	`220`
`246`	`221`	`# Check if we need nested structure ((text + HTML) + attachments)`
`247`	`222`	`has_both_bodies<- exists_list(x$parts,TEXT_PART)&&`
`@@ -364,3 +339,123 @@ with_defaults <- function(defaults, ...) {`
`364`	`339`	`missing<- setdiff(names(defaults), names(args))`
`365`	`340`	`c(defaults[missing],args)`
`366`	`341`	`}`
	`342`	`+`
	`343`	`+# Header encoding helpers ------------------------------------------------------`
	`344`	`+#`
	`345`	`+# In general, the Gmail API requires following RFC 2822 Internet Message Format`
	`346`	`+# https://datatracker.ietf.org/doc/html/rfc2822`
	`347`	`+#`
	`348`	`+# Then, within that, non-ASCII text in headers is addressed in RFC 2047 MIME`
	`349`	`+# Part Three: Message Header Extensions for Non-ASCII Text`
	`350`	`+# https://datatracker.ietf.org/doc/html/rfc2047`
	`351`	`+#`
	`352`	`+# Refactoring the header processing was motivated by`
	`353`	`+# https://github.com/r-lib/gmailr/issues/193`
	`354`	`+`
	`355`	`+# Strategy: Divide headers into address headers vs. everything else.`
	`356`	`+#`
	`357`	`+# Use existing helper to encode address headers, as it was clearly written for`
	`358`	`+# that use case.`
	`359`	`+#`
	`360`	`+# Use a new helper for other headers, that can deal with "folding" (see the RFC)`
	`361`	`+# long-ish, non-ASCII text, e.g. in the Subject.`
	`362`	`+`
	`363`	`+encode_header<-function(name,value) {`
	`364`	`+address_headers<- c(`
	`365`	`+"To",`
	`366`	`+"From",`
	`367`	`+"Cc",`
	`368`	`+"Bcc",`
	`369`	`+"Reply-To",`
	`370`	`+"Sender",`
	`371`	`+"Resent-To",`
	`372`	`+"Resent-From",`
	`373`	`+"Resent-Cc",`
	`374`	`+"Resent-Bcc",`
	`375`	`+"Resent-Sender"`
	`376`	`+ )`
	`377`	`+`
	`378`	`+fun<-if (name%in%address_headers) {`
	`379`	`+header_encode_address`
	`380`	`+ }else {`
	`381`	`+header_encode_text`
	`382`	`+ }`
	`383`	`+ fun(value)`
	`384`	`+}`
	`385`	`+`
	`386`	`+# Pre-existing helper now renamed to reflect its motivating use case.`
	`387`	`+# - May contain multiple comma-separated addresses`
	`388`	`+# - Each address may have the format "Name" <email@example.com>`
	`389`	`+# - Only the "Name" part needs encoding, not the email address`
	`390`	`+header_encode_address<-function(x) {`
	`391`	`+x<- enc2utf8(unlist(strsplit(as.character(x),", ?")))`
	`392`	`+`
	`393`	`+# this won't deal with <> used in quotes, but I think it is rare enough that`
	`394`	`+# is ok`
	`395`	`+m<-rematch2::re_match(x,"^(?<phrase>[^<]?)(?: <(?<addr_spec>[^>]+)>)?$")`
	`396`	`+res<-character(length(x))`
	`397`	`+`
	`398`	`+# simple addresses contain no <>, so we don't need to do anything further`
	`399`	`+simple<-!nzchar(m$addr_spec)`
	`400`	`+res[simple]<-m$phrase[simple]`
	`401`	`+`
	`402`	`+# complex addresses may need to be base64-encoded`
	`403`	`+needs_encoding<- Encoding(m$phrase)!="unknown"`
	`404`	`+res[needs_encoding]<- sprintf(`
	`405`	`+"=?utf-8?B?%s?=",`
	`406`	`+ vcapply(m$phrase[needs_encoding],encode_base64)`
	`407`	`+ )`
	`408`	`+res[!needs_encoding]<-m$phrase[!needs_encoding]`
	`409`	`+`
	`410`	`+# Add the addr_spec onto non-simple examples`
	`411`	`+res[!simple]<- sprintf("%s <%s>",res[!simple],m$addr_spec[!simple])`
	`412`	`+`
	`413`	`+ paste0(res,collapse=",")`
	`414`	`+}`
	`415`	`+`
	`416`	`+# New helper for a generic "text" header`
	`417`	`+# - Single value (not comma-separated)`
	`418`	`+# - May contain long Unicode text that exceeds RFC 2047's 75-character limit`
	`419`	`+# - Must be "folded" into multiple encoded-words if too long`
	`420`	`+header_encode_text<-function(x) {`
	`421`	`+if (length(x)==0\|\| is.null(x)) {`
	`422`	`+return(x)`
	`423`	`+ }`
	`424`	`+`
	`425`	`+x<- enc2utf8(as.character(x))`
	`426`	`+`
	`427`	`+# Pass pure ASCII through unchanged`
	`428`	`+if (Encoding(x)=="unknown") {`
	`429`	`+return(x)`
	`430`	`+ }`
	`431`	`+`
	`432`	`+# First, get a single base64-encoded string`
	`433`	`+b64_full<- encode_base64(x,line_length=0L,newline="")`
	`434`	`+b64_len<- nchar(b64_full)`
	`435`	`+`
	`436`	`+# encoded-word = "=?" charset "?" encoding "?" encoded-text "?="`
	`437`	`+# charset is utf-8`
	`438`	`+# encoding is "B" (as opposed to "Q"), as in "BASE64"`
	`439`	`+encode_word<-function(b64) sprintf("=?utf-8?B?%s?=",b64)`
	`440`	`+`
	`441`	`+# RFC 2047: "An 'encoded-word' may not be more than 75 characters long,`
	`442`	`+# including 'charset', 'encoding', 'encoded-text', and delimiters."`
	`443`	`+# Format: =?utf-8?B?<encoded-text>?=`
	`444`	`+# The formalities account for 12 characters, which leaves up to 63 characters`
	`445`	`+# for the encoded text. However, base64 works in 4-character groups, so we`
	`446`	`+# must use a multiple of 4: the largest is 60.`
	`447`	`+max_b64_per_word<-60`
	`448`	`+`
	`449`	`+# Return as single encoded-word, if possible`
	`450`	`+if (b64_len<=max_b64_per_word) {`
	`451`	`+return(encode_word(b64_full))`
	`452`	`+ }`
	`453`	`+`
	`454`	`+# Otherwise, split into multiple encoded-words`
	`455`	`+starts<- seq(1L,b64_len,by=max_b64_per_word)`
	`456`	`+stops<- c(starts[-1]-1L,b64_len)`
	`457`	`+encoded_words<- encode_word(substring(b64_full,starts,stops))`
	`458`	`+`
	`459`	`+# Join multiple encoded-words with CRLF SPACE per RFC 2047`
	`460`	`+ paste0(encoded_words,collapse="\r\n")`
	`461`	`+}`

`‎R/utils.R‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -171,9 +171,9 @@ encode_base64 <- function(x, line_length = 76L, newline = "\r\n") {`
`171`	`171`	`}`
`172`	`172`
`173`	`173`	`if (is.raw(x)) {`
`174`		`- base64encode(x,76L,newline)`
	`174`	`+ base64encode(x,line_length,newline)`
`175`	`175`	`}else {`
`176`		`- base64encode(charToRaw(x),76L,"\r\n")`
	`176`	`+ base64encode(charToRaw(x),line_length,newline)`
`177`	`177`	`}`
`178`	`178`	`}`
`179`	`179`

`‎tests/testthat/test-gm_mime.R‎`

Lines changed: 80 additions & 9 deletions

Original file line number	Diff line number	Diff line change
`@@ -4,39 +4,39 @@ test_that("MIME - Basic functions", {`
`4`	`4`	`expect_true(length(msg$header)>0)`
`5`	`5`
`6`	`6`	`rv<- gm_to(msg,"adam@ali.as")`
`7`		`- expect_equal(header_encode(rv$header$To),"adam@ali.as")`
	`7`	`+ expect_equal(header_encode_address(rv$header$To),"adam@ali.as")`
`8`	`8`
`9`	`9`	`rv<- gm_from(msg,"bob@ali.as")`
`10`		`- expect_equal(header_encode(rv$header$From),"bob@ali.as")`
	`10`	`+ expect_equal(header_encode_address(rv$header$From),"bob@ali.as")`
`11`	`11`
`12`	`12`	`rv<- gm_to(msg, c("adam@ali.as","another@ali.as","bob@ali.as"))`
`13`	`13`	`expect_equal(`
`14`		`-header_encode(rv$header$To),`
	`14`	`+header_encode_address(rv$header$To),`
`15`	`15`	`"adam@ali.as, another@ali.as, bob@ali.as"`
`16`	`16`	`)`
`17`	`17`
`18`	`18`	`rv<- gm_cc(msg, c("adam@ali.as","another@ali.as","bob@ali.as"))`
`19`	`19`	`expect_equal(`
`20`		`-header_encode(rv$header$Cc),`
	`20`	`+header_encode_address(rv$header$Cc),`
`21`	`21`	`"adam@ali.as, another@ali.as, bob@ali.as"`
`22`	`22`	`)`
`23`	`23`
`24`	`24`	`rv<- gm_bcc(msg, c("adam@ali.as","another@ali.as","bob@ali.as"))`
`25`	`25`	`expect_equal(`
`26`		`-header_encode(rv$header$Bcc),`
	`26`	`+header_encode_address(rv$header$Bcc),`
`27`	`27`	`"adam@ali.as, another@ali.as, bob@ali.as"`
`28`	`28`	`)`
`29`	`29`	`})`
`30`	`30`
`31`		`-test_that("header_encode encodes non-ascii values as base64", {`
`32`		`- expect_equal(header_encode("f\U00F6\U00F6"),"=?utf-8?B?ZsO2w7Y=?=")`
	`31`	`+test_that("header_encode_address encodes non-ascii values as base64", {`
	`32`	`+ expect_equal(header_encode_address("f\U00F6\U00F6"),"=?utf-8?B?ZsO2w7Y=?=")`
`33`	`33`
`34`	`34`	`expect_equal(`
`35`		`-header_encode('"f\U00F6\U00F6 b\U00Er1" <baz@qux.com>'),`
	`35`	`+header_encode_address('"f\U00F6\U00F6 b\U00Er1" <baz@qux.com>'),`
`36`	`36`	`"=?utf-8?B?ImbDtsO2IGIOcjEi?= <baz@qux.com>"`
`37`	`37`	`)`
`38`	`38`
`39`		`-res<-header_encode(`
	`39`	`+res<-header_encode_address(`
`40`	`40`	`c(`
`41`	`41`	`'"f\U00F6\U00F6 b\U00E1r" <baz@qux.com>',`
`42`	`42`	`'"foo bar" <foo.bar@baz.com>',`
`@@ -248,3 +248,74 @@ test_that("trailing whitespace", {`
`248`	`248`	`quoted_printable_encode("foo\t\n\t")`
`249`	`249`	`)`
`250`	`250`	`})`
	`251`	`+`
	`252`	`+test_that("header_encode_text() passes ASCII-only text through", {`
	`253`	`+ascii_subject<-"This is a plain ASCII subject"`
	`254`	`+result<- header_encode_text(ascii_subject)`
	`255`	`+ expect_equal(result,ascii_subject)`
	`256`	`+`
	`257`	`+long_ascii<- strrep("a",100)`
	`258`	`+result<- header_encode_text(long_ascii)`
	`259`	`+ expect_equal(result,long_ascii)`
	`260`	`+})`
	`261`	`+`
	`262`	`+test_that("header_encode_text() encodes short Unicode text", {`
	`263`	`+# Short subject with Unicode that fits in single encoded-word`
	`264`	`+short_unicode<-"Hello\u00E1\u00E9\u00ED\u00F3\u00FA"`
	`265`	`+result<- header_encode_text(short_unicode)`
	`266`	`+`
	`267`	`+# Should not contain CRLF (no folding)`
	`268`	`+ expect_no_match(result,"\r\n",fixed=TRUE)`
	`269`	`+# Should be a single encoded-word`
	`270`	`+ expect_match(result,"^=[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=$")`
	`271`	`+# Should be within RFC 2047 limit`
	`272`	`+ expect_lte(nchar(result),75)`
	`273`	`+})`
	`274`	`+`
	`275`	`+# https://github.com/r-lib/gmailr/issues/193`
	`276`	`+test_that("header_encode_text() folds long non-ASCII text", {`
	`277`	`+long_subject<- paste0("\u00E1", strrep("a",54),"\u00E1")`
	`278`	`+result<- header_encode_text(long_subject)`
	`279`	`+`
	`280`	`+# Should contain CRLF SPACE (folded into multiple encoded-words)`
	`281`	`+ expect_match(result,"\r\n",fixed=TRUE)`
	`282`	`+`
	`283`	`+# Each line should be an encoded-word within RFC 2047 limit`
	`284`	`+lines<- strsplit(result,"\r\n",fixed=TRUE)[[1]]`
	`285`	`+ expect_gt(length(lines),1)`
	`286`	`+for (lineinlines) {`
	`287`	`+ expect_lte(nchar(line),75)`
	`288`	`+ expect_match(line,"^=[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=$")`
	`289`	`+ }`
	`290`	`+})`
	`291`	`+`
	`292`	`+test_that("header_encode_text() roundtrip: encode then decode", {`
	`293`	`+# this is to make sure we break up the encoded-text in chunks of 4 characters`
	`294`	`+original<-"\U0001F389\U0001F38A\U0001F388 C\u00E9l\u00E9bration extraordinaire\u00E0 Z\u00FCrich!\U0001F973\U0001F382\U0001F37E Join us for a tr\u00E8s sp\u00E9cial soir\u00E9e!\U0001F942\U0001F377\U0001F95C"`
	`295`	`+encoded<- header_encode_text(original)`
	`296`	`+`
	`297`	`+encoded_words<- strsplit(encoded,"\r\n",fixed=TRUE)[[1]]`
	`298`	`+encoded_text<- sub("[?]=$","", sub("^=[?]utf-8[?]B[?]","",encoded_words))`
	`299`	`+`
	`300`	`+# Decode each chunk separately (to verify each is valid base64), then concatenate`
	`301`	`+decoded<- rawToChar(unlist(lapply(encoded_text,base64decode)))`
	`302`	`+ Encoding(decoded)<-"UTF-8"`
	`303`	`+ expect_equal(decoded,original)`
	`304`	`+})`
	`305`	`+`
	`306`	`+test_that("gm_subject() uses proper encoding in full MIME message", {`
	`307`	`+# Long subject - should be folded`
	`308`	`+long_subject<- paste0("\u00E1", strrep("a",100),"\u00E1")`
	`309`	`+msg_long<- gm_mime()\|>`
	`310`	`+ gm_to("test@example.com")\|>`
	`311`	`+ gm_subject(long_subject)\|>`
	`312`	`+ gm_text_body("Body")`
	`313`	`+`
	`314`	`+msg_long_chr<- as.character(msg_long)`
	`315`	`+`
	`316`	`+# The subject should span multiple lines with proper folding`
	`317`	`+ expect_match(`
	`318`	`+msg_long_chr,`
	`319`	`+"Subject: =[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=\r\n =[?]utf-8[?]B[?]"`
	`320`	`+ )`
	`321`	`+})`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitd4b5d41

File tree

4 files changed

4 files changed

`‎NEWS.md‎`

`‎R/gm_mime.R‎`

`‎R/utils.R‎`

`‎tests/testthat/test-gm_mime.R‎`

0 commit comments