r-lib · MichaelChirico · Mar 5, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R
@@ -0,0 +1,130 @@
+# Fuzz testing for lint consistency
+#
+# We have often encountered issues where we handle
+#   equivalent R constructs inconsistently, e.g.,
+#   function(...) should almost always match the same
+#   rules as \(...), and '<-' assignment should almost
+#   always be equivalent to '='.
+#
+# Here, we seek to enforce that (under eventual consistency)
+#   by randomly altering the contents of files encountered
+#   under expect_lint() to swap known equivalencies.
+
+library(xml2)
+
+expect_lint_file <- "R/expect_lint.R"
+
+original <- readLines(expect_lint_file)
+expected_line <- "file <- maybe_write_content(file, content)"
+expected_line_idx <- grep(expected_line, original, fixed = TRUE)
+if (length(expected_line_idx) != 1L) {
+  stop(sprintf(
+    "Please update this workflow -- need exactly one hit for line '%s' in file '%s'.",
+    expected_line, expect_lint_file
+  ))
+}
+writeLines(
+  c(
+    head(original, expected_line_idx-1L),
+    # overwrite original exit hook to always delete the fuzzed file
+    "  on.exit({reset_lang(old_lang); unlink(file)})",
+    "  file <- maybe_fuzz_content(file, content)",
+    tail(original, -expected_line_idx),
+    readLines(".dev/maybe_fuzz_content.R")
+  ),
+  expect_lint_file
+)
+# Not useful in CI but good when running locally.
+withr::defer({
+  writeLines(original, expect_lint_file)
+  pkgload::load_all()
+})
+
+pkgload::load_all()
+
+# beware lazy eval: originally tried adding a withr::defer() in each iteration, but
+#   this effectively only runs the last 'defer' expression as the names are only
+#   evaluated at run-time. So instead keep track of all edits in this object.
+# this approach to implementing 'nofuzz' feels painfully manual, but I couldn't
+#   figure out how else to get 'testthat' to give us what we need -- the failures
+#   object in the reporter is frustratingly inconsistent in whether the trace
+#   exists, and even if it does, we'd have to text-mangle to get the corresponding
+#   file names out. Also, the trace 'srcref' happens under keep.source=FALSE,
+#   so we lose any associated comments anyway. even that would not solve the issue
+#   of getting top-level exclusions done for 'nofuzz start|end' ranges, except
+#   maybe if it enabled us to reuse lintr's own exclude() system.
+# therefore we take this approach: pass over the test suite first and comment out
+#   any tests/units that have been marked 'nofuzz'. restore later.
+test_restorations <- list()
+for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) {
+  xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE)))
+  # parent::* to catch top-level comments (exprlist). matches one-line nofuzz and start/end ranges.
+  nofuzz_lines <- xml_find_all(xml, "//COMMENT[contains(text(), 'nofuzz')]/parent::*")
+  if (length(nofuzz_lines) == 0L) next
+
+  test_original <- test_lines <- readLines(test_file)
+
+  for (nofuzz_line in nofuzz_lines) {
+    comments <- xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]")
+    comment_text <- xml_text(comments)
+    # handle start/end ranges first.
+    start_idx <- grep("nofuzz start", comment_text, fixed = TRUE)
+    end_idx <- grep("nofuzz end", comment_text, fixed = TRUE)
+    if (length(start_idx) != length(end_idx) || any(end_idx < start_idx)) {
+      stop(sprintf(
+        "Mismatched '# nofuzz start' (%s), '# nofuzz end' (%s) in %s",
+        toString(start_idx), toString(end_idx), test_file
+      ))
+    }
+
+    comment_ranges <- Map(`:`,
+      as.integer(xml_attr(comments[start_idx], "line1")),
+      as.integer(xml_attr(comments[end_idx], "line1"))
+    )
+    for (comment_range in comment_ranges) {
+      test_lines[comment_range] <- paste("#", test_lines[comment_range])
+    }
+
+    if (length(start_idx) > 0L && !any(!start_idx & !end_idx)) next
+
+    # NB: one-line tests line expect_lint(...) # nofuzz are not supported,
+    #   since the comment will attach to the parent test_that() & thus comment
+    #   out the whole unit. Easiest solution is just to spread out those few tests for now.
+    comment_range <- as.integer(xml_attr(nofuzz_line, "line1")):as.integer(xml_attr(nofuzz_line, "line2"))
+    test_lines[comment_range] <- paste("#", test_lines[comment_range])
+  }
+
+  writeLines(test_lines, test_file)
+  test_restorations <- c(test_restorations, list(list(file = test_file, lines = test_original)))
+}
+withr::defer(for (restoration in test_restorations) writeLines(restoration$lines, restoration$file))
+
+# for some reason, 'report <- test_dir(...)' did not work -- the resulting object is ~empty.
+#   even 'report <- test_local(...)', which does return an object, lacks any information about
+#   which tests failed (all reports are about successful or skipped tests). probably this is not
+#   the best approach but documentation was not very helpful.
+reporter <- testthat::SummaryReporter$new()
+testthat::test_local(reporter = reporter, stop_on_failure = FALSE)
+
+failures <- reporter$failures$as_list()
+# ignore any test that failed for expected reasons, e.g. some known lint metadata changes
+#   about line numbers or the contents of the line. this saves us having to pepper tons of
+#   'nofuzz' comments throughout the suite, as well as getting around the difficulty of injecting
+#   'expect_lint()' with new code to ignore these attributes (this latter we might explore later).
+valid_failure <- vapply(
+  failures,
+  function(failure) {
+    if (grepl('(column_number|ranges|line) .* did not match', failure$message)) {
+      return(TRUE)
+    }
+    FALSE
+  },
+  logical(1L)
+)
+if (!all(valid_failure)) {
+  failures <- failures[!valid_failure]
+  names(failures) <- vapply(failures, `[[`, "test", FUN.VALUE = character(1L))
+  cat("Some fuzzed tests failed unexpectedly!\n")
+  print(failures)
+  stop("Use # nofuzz [start|end] to mark false positives or fix any bugs.")
+}
diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R
@@ -0,0 +1,62 @@
+maybe_fuzz_content <- function(file, lines) {
+  if (is.null(file)) {
+    new_file <- tempfile()
+    con <- file(new_file, encoding = "UTF-8")
+    writeLines(lines, con = con, sep = "\n")
+    close(con)
+  } else {
+    new_file <- tempfile(fileext = paste0(".", tools::file_ext(file)))
+    file.copy(file, new_file, copy.mode = FALSE)
+  }
+
+  apply_fuzzers(new_file)
+
+  new_file
+}
+
+function_lambda_fuzzer <- function(pd, lines) {
+  fun_tokens <- c(`'\\\\'` = "\\", `FUNCTION` = "function")
+  fun_idx <- which(pd$token %in% names(fun_tokens))
+  n_fun <- length(fun_idx)
+
+  if (n_fun == 0L) {
+    return(invisible())
+  }
+
+  pd$new_text <- NA_character_
+  pd$new_text[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE)
+
+  for (ii in rev(fun_idx)) {
+    if (pd$text[ii] == pd$new_text[ii]) next
+    # Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96
+    ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii]))
+    lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii])))
+  }
+  lines
+}
+
+# we could also consider just passing any test where no fuzzing takes place,
+#   i.e. letting the other GHA handle whether unfuzzed tests pass as expected.
+apply_fuzzers <- function(f) {
+  # skip errors for e.g. Rmd files, and ignore warnings.
+  #   We could use get_source_expressions(), but with little benefit & much slower.
+  pd <- tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity)
+  if (inherits(pd, "error")) {
+    return(invisible())
+  }
+
+  reparse <- FALSE
+  lines <- readLines(f)
+  for (fuzzer in list(function_lambda_fuzzer)) {
+    if (reparse) {
+      pd <- getParseData(parse(f, keep.source = TRUE))
+      lines <- readLines(f)
+    }
+    updated_lines <- fuzzer(pd, lines)
+    reparse <- !is.null(updated_lines)
+    if (!reparse) next # skip some I/O if we can
+    writeLines(updated_lines, f)
+  }
+
+  invisible()
+}
diff --git a/.github/workflows/ast-fuzz.yaml b/.github/workflows/ast-fuzz.yaml
@@ -0,0 +1,30 @@
+# Randomly change some code & ensure lint equivalency is maintained
+on:
+  push:
+    branches: [main]
+  # TODO before merging: remove this. Only kept to demonstrate during review.
+  pull_request:
+    branches: [main]
+
+name: ast-fuzz
+
+jobs:
+  repo-meta-tests:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: "release"
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+
+      - name: Ensure equivalent code generates equivalent lints
+        run: |
+          callr::rscript(".dev/ast_fuzz_test.R")
+        shell: Rscript {0}
diff --git a/NEWS.md b/NEWS.md
@@ -53,6 +53,10 @@
 ### Lint accuracy fixes: removing false negatives
 
 * `todo_comment_linter()` finds comments inside {roxygen2} markup comments (#2447, @MichaelChirico).
+* Linters with logic around function declarations consistently include the R 4.0.0 shorthand `\()` (#2818, continuation of earlier #2190, @MichaelChirico).
+  + `library_call_linter()`
+  + `terminal_close_linter()`
+  + `unnecessary_lambda_linter()`
 
 ## Notes
 

diff --git a/R/expect_lint.R b/R/expect_lint.R
@@ -47,15 +47,8 @@ expect_lint <- function(content, checks, ..., file = NULL, language = "en") {
   old_lang <- set_lang(language)
   on.exit(reset_lang(old_lang))
 
-  if (is.null(file)) {
-    file <- tempfile()
-    on.exit(unlink(file), add = TRUE)
-    local({
-      con <- base::file(file, encoding = "UTF-8")
-      on.exit(close(con))
-      writeLines(content, con = con, sep = "\n")
-    })
-  }
+  if (is.null(file)) on.exit(unlink(file), add = TRUE)
+  file <- maybe_write_content(file, content)
 
   lints <- lint(file, ...)
   n_lints <- length(lints)
@@ -121,6 +114,17 @@ expect_no_lint <- function(content, ..., file = NULL, language = "en") {
   expect_lint(content, NULL, ..., file = file, language = language)
 }
 
+maybe_write_content <- function(file, lines) {
+  if (!is.null(file)) {
+    return(file)
+  }
+  tmp <- tempfile()
+  con <- file(tmp, encoding = "UTF-8")
+  on.exit(close(con))
+  writeLines(lines, con = con, sep = "\n")
+  tmp
+}
+
 #' Test that the package is lint free
 #'
 #' This function is a thin wrapper around lint_package that simply tests there are no

diff --git a/R/library_call_linter.R b/R/library_call_linter.R
@@ -111,7 +111,7 @@ library_call_linter <- function(allow_preamble = TRUE) {
       expr[2][STR_CONST]
       or (
         SYMBOL_SUB[text() = 'character.only']
-        and not(ancestor::expr[FUNCTION])
+        and not(ancestor::expr[FUNCTION or OP-LAMBDA])
       )
     ]
   ")
@@ -122,7 +122,7 @@ library_call_linter <- function(allow_preamble = TRUE) {
   //SYMBOL_FUNCTION_CALL[{ xp_text_in_table(bad_indirect_funs) }]
     /parent::expr
     /parent::expr[
-      not(ancestor::expr[FUNCTION])
+      not(ancestor::expr[FUNCTION or OP-LAMBDA])
       and expr[{ call_symbol_cond }]
     ]
   ")

diff --git a/R/terminal_close_linter.R b/R/terminal_close_linter.R
@@ -39,7 +39,7 @@
 #' @export
 terminal_close_linter <- make_linter_from_xpath(
   xpath = "
-  //FUNCTION
+  (//FUNCTION | //OP-LAMBDA)
     /following-sibling::expr
     /expr[last()][
       expr/SYMBOL_FUNCTION_CALL[text() = 'close']

diff --git a/R/unnecessary_lambda_linter.R b/R/unnecessary_lambda_linter.R
@@ -93,7 +93,7 @@ unnecessary_lambda_linter <- function(allow_comparison = FALSE) {
   # NB: this includes 0+3 and TRUE+FALSE, which are also fine.
   inner_comparison_xpath <- glue("
   parent::expr
-    /expr[FUNCTION]
+    /expr[FUNCTION or OP-LAMBDA]
     /expr[
       ({ xp_or(infix_metadata$xml_tag[infix_metadata$comparator]) })
       and expr[

diff --git a/tests/testthat/test-exclusions.R b/tests/testthat/test-exclusions.R
@@ -142,7 +142,7 @@ test_that("#1442: is_excluded_files works if no global exclusions are specified"
   )
 
   # 3 lints: assignment_linter(), quotes_linter() and line_length_linter()
-  expect_lint(
+  expect_lint( # nofuzz
     file = file.path(tmp, "bad.R"),
     checks = list(
       list(linter = "assignment_linter", line_number = 1L),

diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
@@ -105,9 +105,13 @@ test_that("Multi-byte character truncated by parser is ignored", {
 })
 
 test_that("Can read non UTF-8 file", {
+  withr::local_options(list(lintr.linter_file = tempfile()))
   proj_dir <- test_path("dummy_projects", "project")
   withr::local_dir(proj_dir)
-  expect_no_lint(file = "cp1252.R", linters = list())
+  expect_no_lint( # nofuzz
+    file = "cp1252.R",
+    linters = list()
+  )
 })
 
 test_that("Warns if encoding is misspecified, Pt. 1", {

diff --git a/tests/testthat/test-indentation_linter.R b/tests/testthat/test-indentation_linter.R
@@ -152,6 +152,7 @@ test_that("indentation linter flags improper closing curly braces", {
   )
 })
 
+# nofuzz start
 test_that("function argument indentation works in tidyverse-style", {
   linter <- indentation_linter()
   expect_no_lint(
@@ -355,6 +356,7 @@ test_that("function argument indentation works in always-hanging-style", {
     linter
   )
 })
+# nofuzz end
 
 test_that("indentation with operators works", {
   linter <- indentation_linter()

diff --git a/tests/testthat/test-library_call_linter.R b/tests/testthat/test-library_call_linter.R
@@ -221,6 +221,10 @@ test_that("skips allowed usages of library()/character.only=TRUE", {
   expect_no_lint("library(data.table)", linter)
   expect_no_lint("function(pkg) library(pkg, character.only = TRUE)", linter)
   expect_no_lint("function(pkgs) sapply(pkgs, require, character.only = TRUE)", linter)
+
+  skip_if_not_r_version("4.1.0")
+  expect_no_lint("\\(pkg) library(pkg, character.only = TRUE)", linter)
+  expect_no_lint("\\(pkgs) sapply(pkgs, require, character.only = TRUE)", linter)
 })
 
 test_that("blocks disallowed usages of strings in library()/require()", {