-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathocfiles.R
87 lines (75 loc) · 4.15 KB
/
ocfiles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
##' Load metadata and location of files of ocean colour data products.
##'
##' This function loads the latest cache of stored files for
##' NASA ocean colour products.
##' @param time.resolution daily or monthly files?
##' @param product choice of ocean colour sensor
##' @param varname which variable (or set of variables)
##' @param type which level of data
##' @param bz2.rm ignore files that are compressed
##' @param ext which extension, e.g. "nc" or "main"
##' @param ... reserved for future use, currently ignored
##' @export
##' @return data.frame of \code{file} and \code{date}
ocfiles <- function(time.resolution = c("daily", "weekly", "monthly", "weekly32"),
product = c("MODISA", "SeaWiFS", "VIIRS"),
varname = c("CHL", "RRS", "POC", "KD490", "NPP_PAR", "SNPP_CHL", "SNPP_RRS"),
type = c("L3m", "L3b"),
bz2.rm = TRUE,
ext = c("nc", "main"),
...) {
#ftx <- .allfilelist()
ftx <- dplyr::transmute(allfiles() %>% dplyr::filter(stringr::str_detect(file, "oceandata.sci.gsfc.nasa.gov")), fullname = fs::path(root, file))$fullname
time.resolution <- match.arg(time.resolution)
product <- match.arg(product)
ext <- match.arg(ext)
varname <- varname[1L]
type <- match.arg(type)
time <- switch(time.resolution,
daily = "DAY",
weekly = "8D",
monthly = "MO",
weekly32 = "R32")
## NOTE file naming changed from October 2019 (date varied from data set to data set), see https://oceancolor.gsfc.nasa.gov/docs/filenaming-convention/
## old name e.g. A2020366.L3b_DAY_RRS.nc becomes AQUA_MODIS.20201231.L3b.DAY.RRS.nc
## - platform from abbreviation ("A") to full name ("AQUA_MODIS")
## - YYYYDOY to YYYYMMDD
## - separator between file name components is "." not "_"
## first look for files using the new naming convention
prod <- switch(product, MODISA = "AQUA_MODIS", SeaWiFS = "SEASTAR_SEAWIFS_GAC", MODIST = "TERRA_MODIS", CZCS = "NIMBUS7_CZCS", VIIRS = "SNPP_VIIRS")
## note that there is also JPSS1_VIIRS
mtag <- paste0(prod, ".*", paste(type, time, varname, sep = "\\."), ".*\\.", ext) ## separator is "." with new naming
cfiles <- grep(mtag, ftx, value = TRUE)
if (length(cfiles) > 0) {
dates <- sub("\\..+", "", sub("^[^\\.]+\\.", "", basename(cfiles))) ## discard before leading separator, and after second (keep date component)
dates <- as.POSIXct(strptime(dates, "%Y%m%d", tz = "GMT"))
} else {
## did not find any new files, try old file naming
prod <- switch(product, MODISA = "^A", SeaWiFS = "^S", VIIRS = "^V")
## don't forget those ST93c files!
## see here: http://oceancolor.gsfc.nasa.gov/DOCS/FormatChange.html
## Note: ST92 is the test set designation for the SeaWiFS test run, similarly
## AT108 and AT109 are the MODIS-Aqua test set designators. These designations
## will NOT be part of the reprocessing filenames.
mtag <- sprintf(paste0("%s.*\\.", ext), paste(type, time, varname, sep = "_"))
##print(mtag)
##cfiles1 <- sapply(product, function(x) file.path("oceandata.sci.gsfc.nasa.gov", x)
cfiles2 <- grep(mtag, ftx, value = TRUE)
cfiles3 <- cfiles2[grep(prod, basename(cfiles2))]
cfiles <- if (bz2.rm) grep(paste0(ext, "$"), cfiles3, value = TRUE) else cfiles3
if (length(cfiles) < 1) stop("no files found for ", paste(product, varname, type, time.resolution, collapse = ", "))
tokens <- .filetok(basename(cfiles))
dates <- as.POSIXct(strptime(paste0(tokens$year, tokens$jday), "%Y%j", tz = "GMT"))
}
if (length(cfiles) < 1) stop("no files found for ", paste(product, varname, type, time.resolution, collapse = ", "))
tibble::tibble(fullname = cfiles, date = dates)[order(dates), ]
}
## This function is from roc
.filetok <- function(x) {
sensortok <- substr(x, 1, 1)
yeartok <- substr(x, 2, 5)
jdaytok <- substr(x, 6, 8)
## Note: Aquarius is *versioned* so we need some extra handling here
## or we'll just smash them all together (might be ok since the files have the version name)
list(sensor = sensortok, year = yeartok, jday = jdaytok)
}