Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd4b5d41

Browse files
authored
Fold text headers according to RFC 2047 (#211)
* Pass the arguments of encode_base64() along* Handle long-ish `Subject` with non-ASCII characters* Harden against R 4.1 on Windows
1 parent38a6f3e commitd4b5d41

File tree

4 files changed

+207
-39
lines changed

4 files changed

+207
-39
lines changed

‎NEWS.md‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
- Previously:`c("condition", "error", "gmail_error")`
66
- Now:`c("gmailr_error", "gargle_error_request_failed", "http_error_{XXX}", "gargle_error", "rlang_error", "error", "condition")`
77

8+
* Text headers, such as`Subject`, are now properly prepared as per RFC 2047, fixing a problem with long-ish headers that contain non-ASCII characters (#193).
9+
810
##Deprecations
911

1012
* Functions that lack the`gm_` prefix have been removed, concluding a deprecation process that kicked off with gmailr 1.0.0 (released 2019-08-23). These functions were hard deprecated in gmailr 2.0.0 (released 2023-06-30). This eliminates many name conflicts with other packages (including the base package).

‎R/gm_mime.R‎

Lines changed: 123 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -205,32 +205,6 @@ gm_attach_file <- function(mime, filename, type = NULL, id = NULL, ...) {
205205
)
206206
}
207207

208-
header_encode<-function(x) {
209-
x<- enc2utf8(unlist(strsplit(as.character(x),", ?")))
210-
211-
# this won't deal with <> used in quotes, but I think it is rare enough that
212-
# is ok
213-
m<-rematch2::re_match(x,"^(?<phrase>[^<]*?)(?: *<(?<addr_spec>[^>]+)>)?$")
214-
res<-character(length(x))
215-
216-
# simple addresses contain no <>, so we don't need to do anything further
217-
simple<-!nzchar(m$addr_spec)
218-
res[simple]<-m$phrase[simple]
219-
220-
# complex addresses may need to be base64-encoded
221-
needs_encoding<- Encoding(m$phrase)!="unknown"
222-
res[needs_encoding]<- sprintf(
223-
"=?utf-8?B?%s?=",
224-
vcapply(m$phrase[needs_encoding],encode_base64)
225-
)
226-
res[!needs_encoding]<-m$phrase[!needs_encoding]
227-
228-
# Add the addr_spec onto non-simple examples
229-
res[!simple]<- sprintf("%s <%s>",res[!simple],m$addr_spec[!simple])
230-
231-
paste0(res,collapse=",")
232-
}
233-
234208
#' Convert a mime object to character representation
235209
#'
236210
#' This function converts a mime object into a character vector
@@ -240,8 +214,9 @@ header_encode <- function(x) {
240214
#' @param ... further arguments ignored
241215
#' @export
242216
as.character.mime<-function(x,newline="\r\n",...) {
243-
# encode headers
244-
x$header<- lapply(x$header,header_encode)
217+
for (iin seq_along(x$header)) {
218+
x$header[[i]]<- encode_header(names(x$header)[i],x$header[[i]])
219+
}
245220

246221
# Check if we need nested structure ((text + HTML) + attachments)
247222
has_both_bodies<- exists_list(x$parts,TEXT_PART)&&
@@ -364,3 +339,123 @@ with_defaults <- function(defaults, ...) {
364339
missing<- setdiff(names(defaults), names(args))
365340
c(defaults[missing],args)
366341
}
342+
343+
# Header encoding helpers ------------------------------------------------------
344+
#
345+
# In general, the Gmail API requires following RFC 2822 Internet Message Format
346+
# https://datatracker.ietf.org/doc/html/rfc2822
347+
#
348+
# Then, within that, non-ASCII text in headers is addressed in RFC 2047 MIME
349+
# Part Three: Message Header Extensions for Non-ASCII Text
350+
# https://datatracker.ietf.org/doc/html/rfc2047
351+
#
352+
# Refactoring the header processing was motivated by
353+
# https://github.com/r-lib/gmailr/issues/193
354+
355+
# Strategy: Divide headers into address headers vs. everything else.
356+
#
357+
# Use existing helper to encode address headers, as it was clearly written for
358+
# that use case.
359+
#
360+
# Use a new helper for other headers, that can deal with "folding" (see the RFC)
361+
# long-ish, non-ASCII text, e.g. in the Subject.
362+
363+
encode_header<-function(name,value) {
364+
address_headers<- c(
365+
"To",
366+
"From",
367+
"Cc",
368+
"Bcc",
369+
"Reply-To",
370+
"Sender",
371+
"Resent-To",
372+
"Resent-From",
373+
"Resent-Cc",
374+
"Resent-Bcc",
375+
"Resent-Sender"
376+
)
377+
378+
fun<-if (name%in%address_headers) {
379+
header_encode_address
380+
}else {
381+
header_encode_text
382+
}
383+
fun(value)
384+
}
385+
386+
# Pre-existing helper now renamed to reflect its motivating use case.
387+
# - May contain multiple comma-separated addresses
388+
# - Each address may have the format "Name" <email@example.com>
389+
# - Only the "Name" part needs encoding, not the email address
390+
header_encode_address<-function(x) {
391+
x<- enc2utf8(unlist(strsplit(as.character(x),", ?")))
392+
393+
# this won't deal with <> used in quotes, but I think it is rare enough that
394+
# is ok
395+
m<-rematch2::re_match(x,"^(?<phrase>[^<]*?)(?: *<(?<addr_spec>[^>]+)>)?$")
396+
res<-character(length(x))
397+
398+
# simple addresses contain no <>, so we don't need to do anything further
399+
simple<-!nzchar(m$addr_spec)
400+
res[simple]<-m$phrase[simple]
401+
402+
# complex addresses may need to be base64-encoded
403+
needs_encoding<- Encoding(m$phrase)!="unknown"
404+
res[needs_encoding]<- sprintf(
405+
"=?utf-8?B?%s?=",
406+
vcapply(m$phrase[needs_encoding],encode_base64)
407+
)
408+
res[!needs_encoding]<-m$phrase[!needs_encoding]
409+
410+
# Add the addr_spec onto non-simple examples
411+
res[!simple]<- sprintf("%s <%s>",res[!simple],m$addr_spec[!simple])
412+
413+
paste0(res,collapse=",")
414+
}
415+
416+
# New helper for a generic "text" header
417+
# - Single value (not comma-separated)
418+
# - May contain long Unicode text that exceeds RFC 2047's 75-character limit
419+
# - Must be "folded" into multiple encoded-words if too long
420+
header_encode_text<-function(x) {
421+
if (length(x)==0|| is.null(x)) {
422+
return(x)
423+
}
424+
425+
x<- enc2utf8(as.character(x))
426+
427+
# Pass pure ASCII through unchanged
428+
if (Encoding(x)=="unknown") {
429+
return(x)
430+
}
431+
432+
# First, get a single base64-encoded string
433+
b64_full<- encode_base64(x,line_length=0L,newline="")
434+
b64_len<- nchar(b64_full)
435+
436+
# encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
437+
# charset is utf-8
438+
# encoding is "B" (as opposed to "Q"), as in "BASE64"
439+
encode_word<-function(b64) sprintf("=?utf-8?B?%s?=",b64)
440+
441+
# RFC 2047: "An 'encoded-word' may not be more than 75 characters long,
442+
# including 'charset', 'encoding', 'encoded-text', and delimiters."
443+
# Format: =?utf-8?B?<encoded-text>?=
444+
# The formalities account for 12 characters, which leaves up to 63 characters
445+
# for the encoded text. However, base64 works in 4-character groups, so we
446+
# must use a multiple of 4: the largest is 60.
447+
max_b64_per_word<-60
448+
449+
# Return as single encoded-word, if possible
450+
if (b64_len<=max_b64_per_word) {
451+
return(encode_word(b64_full))
452+
}
453+
454+
# Otherwise, split into multiple encoded-words
455+
starts<- seq(1L,b64_len,by=max_b64_per_word)
456+
stops<- c(starts[-1]-1L,b64_len)
457+
encoded_words<- encode_word(substring(b64_full,starts,stops))
458+
459+
# Join multiple encoded-words with CRLF SPACE per RFC 2047
460+
paste0(encoded_words,collapse="\r\n")
461+
}

‎R/utils.R‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ encode_base64 <- function(x, line_length = 76L, newline = "\r\n") {
171171
}
172172

173173
if (is.raw(x)) {
174-
base64encode(x,76L,newline)
174+
base64encode(x,line_length,newline)
175175
}else {
176-
base64encode(charToRaw(x),76L,"\r\n")
176+
base64encode(charToRaw(x),line_length,newline)
177177
}
178178
}
179179

‎tests/testthat/test-gm_mime.R‎

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,39 @@ test_that("MIME - Basic functions", {
44
expect_true(length(msg$header)>0)
55

66
rv<- gm_to(msg,"adam@ali.as")
7-
expect_equal(header_encode(rv$header$To),"adam@ali.as")
7+
expect_equal(header_encode_address(rv$header$To),"adam@ali.as")
88

99
rv<- gm_from(msg,"bob@ali.as")
10-
expect_equal(header_encode(rv$header$From),"bob@ali.as")
10+
expect_equal(header_encode_address(rv$header$From),"bob@ali.as")
1111

1212
rv<- gm_to(msg, c("adam@ali.as","another@ali.as","bob@ali.as"))
1313
expect_equal(
14-
header_encode(rv$header$To),
14+
header_encode_address(rv$header$To),
1515
"adam@ali.as, another@ali.as, bob@ali.as"
1616
)
1717

1818
rv<- gm_cc(msg, c("adam@ali.as","another@ali.as","bob@ali.as"))
1919
expect_equal(
20-
header_encode(rv$header$Cc),
20+
header_encode_address(rv$header$Cc),
2121
"adam@ali.as, another@ali.as, bob@ali.as"
2222
)
2323

2424
rv<- gm_bcc(msg, c("adam@ali.as","another@ali.as","bob@ali.as"))
2525
expect_equal(
26-
header_encode(rv$header$Bcc),
26+
header_encode_address(rv$header$Bcc),
2727
"adam@ali.as, another@ali.as, bob@ali.as"
2828
)
2929
})
3030

31-
test_that("header_encode encodes non-ascii values as base64", {
32-
expect_equal(header_encode("f\U00F6\U00F6"),"=?utf-8?B?ZsO2w7Y=?=")
31+
test_that("header_encode_address encodes non-ascii values as base64", {
32+
expect_equal(header_encode_address("f\U00F6\U00F6"),"=?utf-8?B?ZsO2w7Y=?=")
3333

3434
expect_equal(
35-
header_encode('"f\U00F6\U00F6 b\U00Er1" <baz@qux.com>'),
35+
header_encode_address('"f\U00F6\U00F6 b\U00Er1" <baz@qux.com>'),
3636
"=?utf-8?B?ImbDtsO2IGIOcjEi?= <baz@qux.com>"
3737
)
3838

39-
res<-header_encode(
39+
res<-header_encode_address(
4040
c(
4141
'"f\U00F6\U00F6 b\U00E1r" <baz@qux.com>',
4242
'"foo bar" <foo.bar@baz.com>',
@@ -248,3 +248,74 @@ test_that("trailing whitespace", {
248248
quoted_printable_encode("foo\t\n\t")
249249
)
250250
})
251+
252+
test_that("header_encode_text() passes ASCII-only text through", {
253+
ascii_subject<-"This is a plain ASCII subject"
254+
result<- header_encode_text(ascii_subject)
255+
expect_equal(result,ascii_subject)
256+
257+
long_ascii<- strrep("a",100)
258+
result<- header_encode_text(long_ascii)
259+
expect_equal(result,long_ascii)
260+
})
261+
262+
test_that("header_encode_text() encodes short Unicode text", {
263+
# Short subject with Unicode that fits in single encoded-word
264+
short_unicode<-"Hello\u00E1\u00E9\u00ED\u00F3\u00FA"
265+
result<- header_encode_text(short_unicode)
266+
267+
# Should not contain CRLF (no folding)
268+
expect_no_match(result,"\r\n",fixed=TRUE)
269+
# Should be a single encoded-word
270+
expect_match(result,"^=[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=$")
271+
# Should be within RFC 2047 limit
272+
expect_lte(nchar(result),75)
273+
})
274+
275+
# https://github.com/r-lib/gmailr/issues/193
276+
test_that("header_encode_text() folds long non-ASCII text", {
277+
long_subject<- paste0("\u00E1", strrep("a",54),"\u00E1")
278+
result<- header_encode_text(long_subject)
279+
280+
# Should contain CRLF SPACE (folded into multiple encoded-words)
281+
expect_match(result,"\r\n",fixed=TRUE)
282+
283+
# Each line should be an encoded-word within RFC 2047 limit
284+
lines<- strsplit(result,"\r\n",fixed=TRUE)[[1]]
285+
expect_gt(length(lines),1)
286+
for (lineinlines) {
287+
expect_lte(nchar(line),75)
288+
expect_match(line,"^=[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=$")
289+
}
290+
})
291+
292+
test_that("header_encode_text() roundtrip: encode then decode", {
293+
# this is to make sure we break up the encoded-text in chunks of 4 characters
294+
original<-"\U0001F389\U0001F38A\U0001F388 C\u00E9l\u00E9bration extraordinaire\u00E0 Z\u00FCrich!\U0001F973\U0001F382\U0001F37E Join us for a tr\u00E8s sp\u00E9cial soir\u00E9e!\U0001F942\U0001F377\U0001F95C"
295+
encoded<- header_encode_text(original)
296+
297+
encoded_words<- strsplit(encoded,"\r\n",fixed=TRUE)[[1]]
298+
encoded_text<- sub("[?]=$","", sub("^=[?]utf-8[?]B[?]","",encoded_words))
299+
300+
# Decode each chunk separately (to verify each is valid base64), then concatenate
301+
decoded<- rawToChar(unlist(lapply(encoded_text,base64decode)))
302+
Encoding(decoded)<-"UTF-8"
303+
expect_equal(decoded,original)
304+
})
305+
306+
test_that("gm_subject() uses proper encoding in full MIME message", {
307+
# Long subject - should be folded
308+
long_subject<- paste0("\u00E1", strrep("a",100),"\u00E1")
309+
msg_long<- gm_mime()|>
310+
gm_to("test@example.com")|>
311+
gm_subject(long_subject)|>
312+
gm_text_body("Body")
313+
314+
msg_long_chr<- as.character(msg_long)
315+
316+
# The subject should span multiple lines with proper folding
317+
expect_match(
318+
msg_long_chr,
319+
"Subject: =[?]utf-8[?]B[?][A-Za-z0-9+/=]+[?]=\r\n =[?]utf-8[?]B[?]"
320+
)
321+
})

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp