Skip to content

Commit cb57d2e

Browse files
-Fixes moment matrix calculation when there are continuous disallowed combinations, including those that vary depending on the categorical level, by integrating over points generated evenly across a convex hull of the continuous factors for that particular factor level combination.
1 parent 7e00961 commit cb57d2e

5 files changed

+304
-2
lines changed
+220
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
#' @title Compute convex hull in half-space form
2+
#' @param points A matrix or data frame of numeric coordinates, size n x d
3+
#' @keywords internal
4+
#'
5+
#' @return A list with elements:
6+
#' - A: a matrix of size m x d (m = number of facets)
7+
#' - b: a length-m vector
8+
#' so that x is inside the hull if and only if A %*% x + b >= 0 (for all rows).
9+
#' - volume: Total volume of the convex hull
10+
convhull_halfspace = function(points) {
11+
pts_mat = as.matrix(points)
12+
13+
ch = geometry::convhulln(pts_mat, options = "Fa", output.options = "n")
14+
chvol = geometry::convhulln(pts_mat, options = "Fa", output.options = "FA")
15+
16+
hull_facets = ch$hull
17+
facet_norms = ch$normals
18+
19+
m = nrow(facet_norms) # number of facets
20+
d = ncol(facet_norms) # dimension
21+
22+
A = facet_norms[,seq_len(d-1), drop=FALSE]
23+
b = facet_norms[,d]
24+
25+
list(
26+
A = A,
27+
b = b,
28+
volume = chvol$vol
29+
)
30+
}
31+
32+
#' @title Interpolate points within a convex hull
33+
#'
34+
#' @param points A numeric matrix (n x d).
35+
#' @param ch_halfspace Convex hull. Output of `convhull_halfspace`
36+
#' @param n_samples_per_dimension Number of samples per dimension (pre filtering) to take
37+
#'
38+
#' @keywords internal
39+
#'
40+
#' @return A list with:
41+
#' - data: The interpolated points
42+
#' - on_edge: A logical vector indicating whether the points
43+
#'
44+
interpolate_convex_hull = function(points, ch_halfspace, n_samples_per_dimension = 11) {
45+
stopifnot(is.matrix(points), ncol(points) >= 2)
46+
47+
facet_normals = ch_halfspace$A # same number of rows as hull_facets
48+
facet_offsets = ch_halfspace$b # same number of rows as hull_facets
49+
50+
ranges = apply(points, 2, range)
51+
52+
eg_list = list()
53+
for(i in seq_len(ncol(ranges))) {
54+
eg_list[[i]] = seq(ranges[1,i], ranges[2,i], length.out=n_samples_per_dimension)
55+
}
56+
numeric_eg = do.call(expand.grid,args = eg_list)
57+
filter_pts_edge = rep(FALSE, nrow(numeric_eg))
58+
filter_pts = rep(TRUE, nrow(numeric_eg))
59+
60+
for(i in seq_len(nrow(facet_normals))) {
61+
vec_single = facet_normals[i,]
62+
proj_pts = apply(numeric_eg, 1, \(x) sum(x*vec_single) + facet_offsets[i])
63+
filter_pts = filter_pts & proj_pts < 1e-8
64+
filter_pts_edge = filter_pts_edge | abs(proj_pts) < 1e-8
65+
}
66+
return(list(data=numeric_eg[which(filter_pts),,drop=FALSE],
67+
on_edge = filter_pts_edge[filter_pts]))
68+
}
69+
70+
#' @title Approximate continuous moment matrix over a numeric bounding box
71+
#'
72+
#' @description Treats the min–max range of each numeric column as a bounding box
73+
#' and samples points uniformly to approximate the integral of the outer product of
74+
#' the model terms, weighted by whether the point
75+
#' is on the edge.
76+
#'
77+
#' @param data Candidate set
78+
#' @param formula Default `~ .`. Model formula specifying the terms.
79+
#' @param n_samples_per_dimension Default `100`. Number of samples to take per dimension when interpolating inside
80+
#' the convex hull.
81+
#'
82+
#' @keywords internal
83+
#' @return A matrix of size `p x p` (where `p` is the number of columns in the model matrix),
84+
#' approximating the continuous moment matrix.
85+
#'
86+
gen_momentsmatrix_continuous = function(
87+
formula = ~ .,
88+
candidate_set,
89+
n_samples_per_dimension
90+
) {
91+
# Identify numeric columns (continuous factors)
92+
is_numeric_col = vapply(candidate_set, is.numeric, logical(1))
93+
numeric_cols = names(candidate_set)[is_numeric_col]
94+
factor_cols = names(candidate_set)[!is_numeric_col]
95+
96+
if (length(numeric_cols) == 0) {
97+
stop("No numeric columns found in data. Cannot compute continuous bounding box.")
98+
}
99+
# Detect any disallowed combinations
100+
unique_vals = prod(vapply(candidate_set, \(x) {length(unique(x))}, FUN.VALUE = integer(1)))
101+
any_disallowed = unique_vals != nrow(candidate_set)
102+
103+
# Simple if all numeric: just integrate over the region.
104+
if(length(factor_cols) == 0) {
105+
sub_candidate_set = as.matrix(candidate_set)
106+
ch = convhull_halfspace(sub_candidate_set)
107+
if (ch$volume <= 0) {
108+
next
109+
}
110+
vol = ch$volume
111+
interp_ch = interpolate_convex_hull(as.matrix(sub_candidate_set), ch, n_samples_per_dimension = n_samples_per_dimension)
112+
new_pts_ch = interp_ch$data
113+
114+
colnames(new_pts_ch) = numeric_cols
115+
interp_df = as.data.frame(new_pts_ch)
116+
117+
# Now build model matrix
118+
Xsub = model.matrix(formula, data = interp_df)
119+
120+
w = rep(1, nrow(Xsub))
121+
w[interp_ch$on_edge] = 0.5
122+
# average subregion moment
123+
Xsub_w = apply(Xsub,2,\(x) x*sqrt(w))
124+
# M_sub = crossprod(Xsub) / sum(w)
125+
126+
M_sub = crossprod(Xsub_w) / sum(w)
127+
128+
# Weighted accumulation
129+
if (is.null(M_acc)) {
130+
M_acc = vol * M_sub
131+
} else {
132+
M_acc = M_acc + vol * M_sub
133+
}
134+
total_weight = total_weight + vol
135+
#Scale by the intercept
136+
if(colnames(M)[1] == "(Intercept)") {
137+
M = M / M[1,1]
138+
}
139+
return(M)
140+
} else {
141+
# For categorical factors with disallowed combinations, we need to account for the
142+
# reduced domain of the integral. We'll calculate a moment matrix as above for each
143+
# factor level combination, weigh it by the total number of points, and sum it. That
144+
# will give us the average prediction variance for the constrained region.
145+
unique_combos = unique(candidate_set[,factor_cols,drop = FALSE])
146+
147+
get_contrasts_from_candset = function(candset) {
148+
csn = colnames(candset)[!unlist(lapply(candset, is.numeric))]
149+
lapply(csn,
150+
\(x) {
151+
setNames(
152+
list(skpr::contr.simplex),
153+
x[[1]])
154+
}) |> unlist()
155+
}
156+
157+
# We'll accumulate a weighted sum of sub-matrices
158+
M_acc = NULL
159+
total_weight = 0
160+
161+
for (r in seq_len(nrow(unique_combos))) {
162+
combo_row = unique_combos[r, , drop=FALSE]
163+
164+
# subset of 'candidate_set' that matches this combo
165+
is_match = TRUE
166+
for (fc in factor_cols) {
167+
is_match = is_match & (candidate_set[[fc]] == combo_row[[fc]])
168+
}
169+
sub_candidate_set = candidate_set[is_match, , drop=FALSE]
170+
sub_candidate_set = sub_candidate_set[,is_numeric_col]
171+
# If no rows => disallowed or doesn't appear => skip
172+
if (!nrow(sub_candidate_set)) {
173+
next
174+
}
175+
176+
# Calculate the convex hull and sample points
177+
ch = convhull_halfspace(sub_candidate_set)
178+
if (ch$volume <= 0) {
179+
next
180+
}
181+
vol = ch$volume
182+
interp_ch = interpolate_convex_hull(as.matrix(sub_candidate_set), ch, n_samples_per_dimension = n_samples_per_dimension)
183+
new_pts_ch = interp_ch$data
184+
185+
colnames(new_pts_ch) = numeric_cols
186+
interp_df = as.data.frame(new_pts_ch)
187+
188+
# Pin the factor columns at this combo
189+
for (fc in factor_cols) {
190+
interp_df[[fc]] = combo_row[[fc]]
191+
}
192+
193+
# Now build model matrix
194+
Xsub = model.matrix(formula, data = interp_df, contrasts.arg = get_contrasts_from_candset(candidate_set))
195+
196+
w = rep(1, nrow(Xsub))
197+
w[interp_ch$on_edge] = 0.5
198+
# average subregion moment
199+
Xsub_w = apply(Xsub,2,\(x) x*sqrt(w))
200+
# M_sub = crossprod(Xsub) / sum(w)
201+
202+
M_sub = crossprod(Xsub_w) / sum(w)
203+
204+
# Weighted accumulation
205+
if (is.null(M_acc)) {
206+
M_acc = vol * M_sub
207+
} else {
208+
M_acc = M_acc + vol * M_sub
209+
}
210+
total_weight = total_weight + vol
211+
}
212+
213+
M = M_acc / total_weight
214+
#Scale by the intercept
215+
if(colnames(M)[1] == "(Intercept)") {
216+
M = M / M[1,1]
217+
}
218+
return(M)
219+
}
220+
}

R/gen_design.R

+7-2
Original file line numberDiff line numberDiff line change
@@ -921,8 +921,13 @@ gen_design = function(candidateset, model, trials,
921921
factors = colnames(candidatesetmm)
922922
levelvector = sapply(lapply(candidateset, unique), length)
923923
classvector = sapply(candidateset, inherits, c("factor", "character"))
924-
925-
mm = gen_momentsmatrix(factors, levelvector, classvector)
924+
if(all(classvector)) {
925+
mm = gen_momentsmatrix(factors, levelvector, classvector)
926+
} else {
927+
mm = gen_momentsmatrix_continuous(formula = model,
928+
data = candidatesetnormalized,
929+
n_samples_per_dimension = 100)
930+
}
926931
if (!parallel) {
927932
if(!is.null(getOption("skpr_progress"))) {
928933
progress = getOption("skpr_progress")

man/convhull_halfspace.Rd

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/gen_momentsmatrix_continuous.Rd

+31
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/interpolate_convex_hull.Rd

+24
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)