Dear Luke Tierney, Thank you for the reply and apologies for not getting back to you earlier.
On Fri, 22 Sep 2023 16:14:58 -0500 (CDT) luke-tier...@uiowa.edu wrote: > I think it would be best to modify errorcall so errorcall_cpy is not > necessary. As things are now it is just too easy to forget that > sometimes errorcall_cpy should be used (and this has lead to some bugs > recently). At the end of this e-mail is a large patch that makes errorcall() and warningcall() safer by processing the format arguments before calling any R APIs. It's fairly invasive because it rewires all error() / warning() / errorcall() / warningcall() processing into two common subroutines that call R_vsnprintf() as soon as possible and keep a single message buffer afterwards. It passes make check-devel. I am open to other, less invasive ways to implement this change. Additionally, I fixed a problem detected by a static analyser: given unsigned msg_len, max(msg_len - strlen(head), 0) can only be 0 if msg_len == strlen(head). That's because both msg_len and size_t end up being promoted to unsigned in order to compute the subtraction. I couldn't come up with a sufficiently laconic alternative, so I left the ternary operator in for now. >> The only solution to the latter problem is an EncodeChar() variant >> that allocates its memory dynamically. Would R_alloc() be >> acceptable in this context? With errors, the allocation stack would >> be quickly reset (except when withCallingHandlers() is in effect?), >> but with warnings, the code would have to restore it manually every >> time. > > Or allow/require a buffer to be provided. So replacing the calls like > > CHAR(PRINTNAME(sym)) > > with > > EncodeSymbol(sym, buf, buf_size) I can also implement this. Does this mean replacing every occasion of EncodeChar(PRINTNAME(sym)) with EncodeSymbol(sym, <temporary>, sizeof <temporary>)? Index: src/main/envir.c =================================================================== --- src/main/envir.c (revision 85251) +++ src/main/envir.c (working copy) @@ -1582,7 +1582,7 @@ } rho = ENCLOS(rho); } - errorcall_cpy(call, + errorcall(call, _("could not find function \"%s\""), EncodeChar(PRINTNAME(symbol))); /* NOT REACHED */ @@ -3924,7 +3924,7 @@ SEXP nsname = PROTECT(callR1(R_getNamespaceNameSymbol, ns)); if (TYPEOF(nsname) != STRSXP || LENGTH(nsname) != 1) errorcall(call, "bad value returned by `getNamespaceName'"); - errorcall_cpy(call, + errorcall(call, _("'%s' is not an exported object from 'namespace:%s'"), EncodeChar(PRINTNAME(name)), CHAR(STRING_ELT(nsname, 0))); Index: src/main/errors.c =================================================================== --- src/main/errors.c (revision 85251) +++ src/main/errors.c (working copy) @@ -58,7 +58,7 @@ /* Different values of inError are used to indicate different places in the error handling: -inError = 1: In internal error handling, e.g. `verrorcall_dflt`, others. +inError = 1: In internal error handling, e.g. `errorcall_dflt`, others. inError = 2: Writing traceback inError = 3: In user error handler (i.e. options(error=handler)) */ @@ -387,32 +387,45 @@ return c ? c->call : R_NilValue; } -void warning(const char *format, ...) +/* declarations for internal condition handling */ + +static void signalError(SEXP call, const char *msg); +static void signalWarning(SEXP call, const char *msg); +NORET static void invokeRestart(SEXP, SEXP); + +static void impl_vwarning(SEXP call, Rboolean immediate, const char *format, va_list ap) { char buf[BUFSIZE], *p; - va_list(ap); - va_start(ap, format); size_t psize; int pval; psize = min(BUFSIZE, R_WarnLength+1); pval = Rvsnprintf_mbcs(buf, psize, format, ap); - va_end(ap); p = buf + strlen(buf) - 1; if(strlen(buf) > 0 && *p == '\n') *p = '\0'; RprintTrunc(buf, pval >= psize); - SEXP call = PROTECT(getCurrentCall()); - warningcall(call, "%s", buf); - UNPROTECT(1); + + // must not call into R before stringifying the format arguments + int nprotect = 0; + if (!call) { + call = PROTECT(getCurrentCall()); + ++nprotect; + } + if (immediate) immediateWarning = 1; + signalWarning(call, buf); + if (immediate) immediateWarning = 0; + UNPROTECT(nprotect); } -/* declarations for internal condition handling */ +void warning(const char *format, ...) +{ + va_list(ap); + va_start(ap, format); + impl_vwarning(NULL, FALSE, format, ap); + va_end(ap); +} -static void vsignalError(SEXP call, const char *format, va_list ap); -static void vsignalWarning(SEXP call, const char *format, va_list ap); -NORET static void invokeRestart(SEXP, SEXP); - static void reset_inWarning(void *data) { inWarning = 0; @@ -437,12 +450,11 @@ return nc; } -static void vwarningcall_dflt(SEXP call, const char *format, va_list ap) +static void warningcall_dflt(SEXP call, const char *msg) { int w; SEXP names, s; const char *dcall; - char buf[BUFSIZE]; RCNTXT *cptr; RCNTXT cntxt; size_t psize; @@ -480,11 +492,8 @@ inWarning = 1; if(w >= 2) { /* make it an error */ - psize = min(BUFSIZE, R_WarnLength+1); - pval = Rvsnprintf_mbcs(buf, psize, format, ap); - RprintTrunc(buf, pval >= psize); inWarning = 0; /* PR#1570 */ - errorcall(call, _("(converted from warning) %s"), buf); + errorcall(call, _("(converted from warning) %s"), msg); } else if(w == 1) { /* print as they happen */ char *tr; @@ -492,18 +501,16 @@ dcall = CHAR(STRING_ELT(deparse1s(call), 0)); } else dcall = ""; psize = min(BUFSIZE, R_WarnLength+1); - pval = Rvsnprintf_mbcs(buf, psize, format, ap); - RprintTrunc(buf, pval >= psize); if(dcall[0] == '\0') REprintf(_("Warning:")); else { REprintf(_("Warning in %s :"), dcall); if(!(noBreakWarning || - ( mbcslocale && 18 + wd(dcall) + wd(buf) <= LONGWARN) || - (!mbcslocale && 18 + strlen(dcall) + strlen(buf) <= LONGWARN))) + ( mbcslocale && 18 + wd(dcall) + wd(msg) <= LONGWARN) || + (!mbcslocale && 18 + strlen(dcall) + strlen(msg) <= LONGWARN))) REprintf("\n "); } - REprintf(" %s\n", buf); + REprintf(" %s\n", msg); if(R_ShowWarnCalls && call != R_NilValue) { tr = R_ConciseTraceback(call, 0); if (strlen(tr)) {REprintf(_("Calls:")); REprintf(" %s\n", tr);} @@ -512,10 +519,11 @@ else if(w == 0) { /* collect them */ if(!R_CollectWarnings) setupwarnings(); if(R_CollectWarnings < R_nwarnings) { + char buf[BUFSIZE]; SET_VECTOR_ELT(R_Warnings, R_CollectWarnings, call); psize = min(BUFSIZE, R_WarnLength+1); - pval = Rvsnprintf_mbcs(buf, psize, format, ap); - RprintTrunc(buf, pval >= psize); + Rstrncpy(buf, msg, psize); + RprintTrunc(buf, strlen(msg) >= psize); if(R_ShowWarnCalls && call != R_NilValue) { char *tr = R_ConciseTraceback(call, 0); size_t nc = strlen(tr); @@ -535,20 +543,12 @@ inWarning = 0; } -static void warningcall_dflt(SEXP call, const char *format,...) -{ - va_list(ap); - va_start(ap, format); - vwarningcall_dflt(call, format, ap); - va_end(ap); -} - void warningcall(SEXP call, const char *format, ...) { va_list(ap); va_start(ap, format); - vsignalWarning(call, format, ap); + impl_vwarning(call, FALSE, format, ap); va_end(ap); } @@ -555,12 +555,9 @@ void warningcall_immediate(SEXP call, const char *format, ...) { va_list(ap); - - immediateWarning = 1; va_start(ap, format); - vsignalWarning(call, format, ap); + impl_vwarning(call, TRUE, format, ap); va_end(ap); - immediateWarning = 0; } static void cleanup_PrintWarnings(void *data) @@ -741,7 +738,7 @@ /* Construct newline terminated error message, write it to global errbuf, and possibly display with REprintf. */ NORET static void -verrorcall_dflt(SEXP call, const char *format, va_list ap) +errorcall_dflt(SEXP call, const char *msg) { if (allowedConstsChecks > 0) { allowedConstsChecks--; @@ -758,7 +755,7 @@ REprintf(_("Error during wrapup: ")); /* this does NOT try to print the call since that could cause a cascade of error calls */ - Rvsnprintf_mbcs(errbuf, sizeof(errbuf), format, ap); + Rstrncpy(errbuf, msg, sizeof(errbuf)); REprintf("%s\n", errbuf); } if (R_Warnings != R_NilValue) { @@ -801,7 +798,7 @@ } const char *dcall = CHAR(STRING_ELT(deparse1s(call), 0)); - Rsnprintf_mbcs(tmp2, BUFSIZE, "%s", head); + Rstrncpy(tmp2, head, BUFSIZE); if (skip != NA_INTEGER) { PROTECT(srcloc = GetSrcLoc(R_GetCurrentSrcref(skip))); protected++; @@ -811,7 +808,7 @@ dcall, CHAR(STRING_ELT(srcloc, 0))); } - Rvsnprintf_mbcs(tmp, max(msg_len - strlen(head), 0), format, ap); + Rstrncpy(tmp, msg, msg_len > strlen(head) ? msg_len - strlen(head) : 0); if (strlen(tmp2) + strlen(tail) + strlen(tmp) < BUFSIZE) { if(len) Rsnprintf_mbcs(errbuf, BUFSIZE, _("Error in %s (from %s) : "), @@ -839,15 +836,15 @@ } ERRBUFCAT(tmp); } else { - Rsnprintf_mbcs(errbuf, BUFSIZE, _("Error: ")); + Rstrncpy(errbuf, _("Error: "), BUFSIZE); ERRBUFCAT(tmp); } UNPROTECT(protected); } else { - Rsnprintf_mbcs(errbuf, BUFSIZE, _("Error: ")); + Rstrncpy(errbuf, _("Error: "), BUFSIZE); p = errbuf + strlen(errbuf); - Rvsnprintf_mbcs(p, max(msg_len - strlen(errbuf), 0), format, ap); + Rstrncpy(p, msg, msg_len > strlen(errbuf) ? msg_len - strlen(errbuf) : 0); } /* Approximate truncation detection, may produce false positives. Assumes R_MB_CUR_MAX > 0. Note: approximation is fine, as the string may include @@ -892,55 +889,44 @@ inError = oldInError; } -NORET static void errorcall_dflt(SEXP call, const char *format,...) -{ - va_list(ap); +NORET static void do_verrorcall(SEXP call, const char *format, va_list ap) { + // must be careful to process the format arguments before calling into R + char buf[BUFSIZE]; + Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap); - va_start(ap, format); - verrorcall_dflt(call, format, ap); - va_end(ap); -} - -NORET void errorcall(SEXP call, const char *format,...) -{ - va_list(ap); - if (call == R_CurrentExpression) /* behave like error( */ call = getCurrentCall(); - va_start(ap, format); - vsignalError(call, format, ap); - va_end(ap); + signalError(call, buf); if (R_ErrorHook != NULL) { - char buf[BUFSIZE]; void (*hook)(SEXP, char *) = R_ErrorHook; R_ErrorHook = NULL; /* to avoid recursion */ - va_start(ap, format); - Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap); - va_end(ap); hook(call, buf); } + errorcall_dflt(call, buf); +} + +NORET void errorcall(SEXP call, const char *format,...) +{ + va_list(ap); va_start(ap, format); - verrorcall_dflt(call, format, ap); + do_verrorcall(call, format, ap); va_end(ap); } -/* Like errorcall, but copies all data for the error message into a buffer - before doing anything else. */ -attribute_hidden -NORET void errorcall_cpy(SEXP call, const char *format, ...) +void error(const char *format, ...) { - char buf[BUFSIZE]; - va_list(ap); va_start(ap, format); - Rvsnprintf_mbcs(buf, BUFSIZE, format, ap); + /* R_CurrentExpression will be processed as if it was getCurrentCall(), but + not before stringifying the format arguments. It's important to process + the format arguments first because calling into R may invalidate some of + the pointers (e.g. those returned by EncodeChar()). */ + do_verrorcall(R_CurrentExpression, format, ap); va_end(ap); - - errorcall(call, "%s", buf); } // geterrmessage(): Return (the global) 'errbuf' as R string @@ -953,17 +939,6 @@ return res; } -void error(const char *format, ...) -{ - char buf[BUFSIZE]; - - va_list(ap); - va_start(ap, format); - Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap); - va_end(ap); - errorcall(getCurrentCall(), "%s", buf); -} - static void try_jump_to_restart(void) { SEXP list; @@ -1834,9 +1809,8 @@ return R_NilValue; } -static void vsignalWarning(SEXP call, const char *format, va_list ap) +static void signalWarning(SEXP call, const char *msg) { - char buf[BUFSIZE]; SEXP hooksym, hcall, qcall, qfun; hooksym = install(".signalSimpleWarning"); @@ -1846,13 +1820,12 @@ PROTECT(qfun); PROTECT(qcall = LCONS(qfun, LCONS(call, R_NilValue))); PROTECT(hcall = LCONS(qcall, R_NilValue)); - Rvsnprintf_mbcs(buf, BUFSIZE - 1, format, ap); - hcall = LCONS(mkString(buf), hcall); + hcall = LCONS(mkString(msg), hcall); PROTECT(hcall = LCONS(hooksym, hcall)); evalKeepVis(hcall, R_GlobalEnv); UNPROTECT(4); } - else vwarningcall_dflt(call, format, ap); + else warningcall_dflt(call, msg); } NORET static void gotoExitingHandler(SEXP cond, SEXP call, SEXP entry) @@ -1865,20 +1838,18 @@ findcontext(CTXT_FUNCTION, rho, result); } -static void vsignalError(SEXP call, const char *format, va_list ap) +static void signalError(SEXP call, const char *msg) { /* This function does not protect or restore the old handler stack. On return R_HandlerStack will be R_NilValue (unless R_RestartToken is encountered). */ - char localbuf[BUFSIZE]; SEXP list; - Rvsnprintf_mbcs(localbuf, BUFSIZE - 1, format, ap); while ((list = findSimpleErrorHandler()) != R_NilValue) { char *buf = errbuf; SEXP entry = CAR(list); R_HandlerStack = CDR(list); - Rstrncpy(buf, localbuf, BUFSIZE); + Rstrncpy(buf, msg, BUFSIZE); /* Rvsnprintf(buf, BUFSIZE - 1, format, ap);*/ if (IS_CALLING_ENTRY(entry)) { if (ENTRY_HANDLER(entry) == R_RestartToken) { @@ -1950,7 +1921,7 @@ if (TYPEOF(msg) == STRSXP && LENGTH(msg) > 0) msgstr = translateChar(STRING_ELT(msg, 0)); else error(_("error message not a string")); - errorcall_dflt(ecall, "%s", msgstr); + errorcall_dflt(ecall, msgstr); } else { SEXP hcall = LCONS(h, LCONS(cond, R_NilValue)); @@ -2075,7 +2046,7 @@ const char *msg = translateChar(STRING_ELT(CAR(args), 0)); SEXP ecall = CADR(args); - warningcall_dflt(ecall, "%s", msg); + warningcall_dflt(ecall, msg); return R_NilValue; } @@ -2088,7 +2059,7 @@ const char *msg = translateChar(STRING_ELT(CAR(args), 0)); SEXP ecall = CADR(args); - errorcall_dflt(ecall, "%s", msg); + errorcall_dflt(ecall, msg); } Index: src/main/eval.c =================================================================== --- src/main/eval.c (revision 85251) +++ src/main/eval.c (working copy) @@ -1153,7 +1153,7 @@ else tmp = findVar(e, rho); if (tmp == R_UnboundValue) - errorcall_cpy(getLexicalCall(rho), + errorcall(getLexicalCall(rho), _("object '%s' not found"), EncodeChar(PRINTNAME(e))); /* if ..d is missing then ddfindVar will signal */ @@ -3473,7 +3473,7 @@ code more consistent. */ } else if (isSymbol(CAR(el)) && R_isMissing(CAR(el), rho)) { /* It was missing */ - errorcall_cpy(call, + errorcall(call, _("'%s' is missing"), EncodeChar(PRINTNAME(CAR(el)))); #endif @@ -5574,7 +5574,7 @@ NORET static void UNBOUND_VARIABLE_ERROR(SEXP symbol, SEXP rho) { - errorcall_cpy(getLexicalCall(rho), + errorcall(getLexicalCall(rho), _("object '%s' not found"), EncodeChar(PRINTNAME(symbol))); } Index: src/main/printutils.c =================================================================== --- src/main/printutils.c (revision 85251) +++ src/main/printutils.c (working copy) @@ -879,9 +879,7 @@ The pointer returned by EncodeChar points into an internal buffer which is overwritten by subsequent calls to EncodeChar/EncodeString. It is the responsibility of the caller to copy the result before - any subsequent call to EncodeChar/EncodeString may happen. Note that - particularly it is NOT safe to pass the result of EncodeChar as 3rd - argument to errorcall (errorcall_cpy can be used instead). */ + any subsequent call to EncodeChar/EncodeString may happen. */ //attribute_hidden const char *EncodeChar(SEXP x) { -- Best regards, Ivan ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel