Spaces:

cjerzak
/

fastrerandomize

Running

App Files Files

xet

Community

cjerzak commited on Mar 30

Commit

9443444

verified ·

1 Parent(s): f2cc599

Update app.R

Browse files

Files changed (1) hide show

app.R +165 -31

app.R CHANGED Viewed

@@ -27,6 +27,7 @@ library(parallel)         # For detecting CPU cores
 # ---------------------------------------------------------
 # HELPER FUNCTIONS (BASE R)
 # ---------------------------------------------------------
 # 1) Compute Hotelling's T^2 in base R
 baseR_hotellingT2 <- function(X, W) {
   # For a single assignment W:
@@ -52,12 +53,11 @@ baseR_hotellingT2 <- function(X, W) {
 }
 # 2) Generate randomizations in base R, filtering by acceptance probability
-#    using T^2 and keep the best (lowest) fraction.
 baseR_generate_randomizations <- function(n_units, n_treated, X, accept_prob, random_type,
                                           max_draws, batch_size) {
   # For safety, check if exact enumerations will explode:
-  # If random_type == "exact", we do combn(n_units, n_treated), which might be huge
   if (random_type == "exact") {
     n_comb_total <- choose(n_units, n_treated)
     if (n_comb_total > 1e6) {
@@ -136,21 +136,134 @@ baseR_generate_randomizations <- function(n_units, n_treated, X, accept_prob, ra
   list(randomizations = assignment_mat_accepted, balance = T2vals_accepted)
 }
-# 3) Base R randomization test: difference in means
-baseR_randomization_test <- function(obsW, obsY, allW) {
-  # obs diff in means
-  n1 <- sum(obsW)
-  n0 <- length(obsW) - n1
-  obs_diff <- mean(obsY[obsW == 1]) - mean(obsY[obsW == 0])
   # for each candidate assignment, compute diff in means on obsY
-  diffs <- apply(allW, 1, function(w) {
-    mean(obsY[w == 1]) - mean(obsY[w == 0])
-  })
   # p-value = fraction whose absolute diff >= observed
-  pval <- mean(abs(diffs) >= abs(obs_diff))
-  list(p_value = pval, tau_obs = obs_diff)
 }
 # ---------------------------------------------------------
@@ -310,7 +423,7 @@ ui <- dashboardPage(
           box(width = 8, title = "Test Results", status = "info", solidHeader = TRUE,
-              # First row: p-value and observed effect
               fluidRow(
                 column(width = 6, valueBoxOutput("pvalue_box", width = 12)),
                 column(width = 6, valueBoxOutput("tauobs_box", width = 12))
@@ -322,7 +435,19 @@ ui <- dashboardPage(
                 column(width = 6, valueBoxOutput("baseR_test_time_box", width = 12))
               ),
               uiOutput("fi_text"),
               br(),
               plotOutput("test_plot", height = "280px")
           )
@@ -390,7 +515,6 @@ server <- function(input, output, session) {
            "Number treated cannot exceed total units.")
     )
-    # ------------------ COMPUTING RESULTS TOGGLE ------------------
     withProgress(message = "Computing results...", value = 0, {
       # =========== 1) fastrerandomize generation timing ===========
@@ -500,7 +624,6 @@ server <- function(input, output, session) {
   # Hardware info (CPU cores, GPU note)
   output$hardware_info <- renderUI({
     num_cores <- detectCores(logical = TRUE)
-    # Basic note about GPU (this can be expanded if you have specialized checks)
     HTML(paste(
       "<strong>System Hardware Info:</strong><br/>",
       "Number of CPU cores detected:", num_cores, "<br/>",
@@ -517,8 +640,6 @@ server <- function(input, output, session) {
   observeEvent(input$simulateY_btn, {
     req(RerandResult())
     rr <- RerandResult()
-    # We'll just use the first accepted randomization as the "observed" assignment
     if (is.null(rr$randomizations) || nrow(rr$randomizations) < 1) {
       showNotification("No accepted randomizations found. Cannot simulate Y for the 'observed' assignment.", type = "error")
       return(NULL)
@@ -564,7 +685,6 @@ server <- function(input, output, session) {
   baseR_test_time <- reactiveVal(NULL)
   observeEvent(input$run_randtest_btn, {
-    # ------------------ COMPUTING RESULTS TOGGLE ------------------
     withProgress(message = "Computing results...", value = 0, {
       req(RerandResult())
@@ -599,7 +719,6 @@ server <- function(input, output, session) {
       fastrand_test_time(difftime(t1_testfast, t0_testfast, units = "secs"))
       # =========== 2) base R randomization test timing ===========
-      # We must also have the base R set of randomizations
       req(RerandResult_base())
       rr_base <- RerandResult_base()
       if (is.null(rr_base$randomizations) || nrow(rr_base$randomizations) < 1) {
@@ -613,7 +732,8 @@ server <- function(input, output, session) {
         baseR_randomization_test(
           obsW    = obsW,
           obsY    = obsY,
-          allW    = rr_base$randomizations
         )
       }, error = function(e) e)
       t1_testbase <- Sys.time()
@@ -632,18 +752,18 @@ server <- function(input, output, session) {
   output$pvalue_box <- renderValueBox({
     rt <- RandTestResult()
     if (is.null(rt)) {
-      valueBox("---", "p-value", icon = icon("question"), color = "blue")
     } else {
-      valueBox(round(rt$p_value, 4), "p-value", icon = icon("list-check"), color = "purple")
     }
   })
   output$tauobs_box <- renderValueBox({
     rt <- RandTestResult()
     if (is.null(rt)) {
-      valueBox("---", "Observed Effect", icon = icon("question"), color = "maroon")
     } else {
-      valueBox(round(rt$tau_obs, 4), "Observed Effect", icon = icon("bullseye"), color = "maroon")
     }
   })
@@ -668,7 +788,7 @@ server <- function(input, output, session) {
     }
   })
-  # If we have a fiducial interval, display it
   output$fi_text <- renderUI({
     rt <- RandTestResult()
     if (is.null(rt) || is.null(rt$FI)) {
@@ -678,29 +798,43 @@ server <- function(input, output, session) {
     fi_upper <- round(rt$FI[2], 4)
     tagList(
-      strong("Fiducial Interval (95%):"),
       p(sprintf("[%.4f, %.4f]", fi_lower, fi_upper))
     )
   })
   # A simple plot for the randomization distribution (for demonstration).
-  # In this minimal example, we do not store the entire distribution in 'randomization_test',
   # so we simply show the observed effect as a point.
   output$test_plot <- renderPlot({
     rt <- RandTestResult()
     if (is.null(rt)) {
-      # no test run yet
       plot.new()
       title("No test results yet.")
       return(NULL)
     }
-    # Just display the observed effect
     obs_val <- rt$tau_obs
     ggplot(data.frame(x = obs_val, y = 0), aes(x, y)) +
       geom_point(size=4, color="red") +
       xlim(c(obs_val - abs(obs_val)*2 - 1, obs_val + abs(obs_val)*2 + 1)) +
-      labs(title = "Observed Treatment Effect",
            x = "Effect Size", y = "") +
       theme_minimal(base_size = 14) +
       geom_vline(xintercept = 0, linetype="dashed", color="gray40")

 # ---------------------------------------------------------
 # HELPER FUNCTIONS (BASE R)
 # ---------------------------------------------------------
 # 1) Compute Hotelling's T^2 in base R
 baseR_hotellingT2 <- function(X, W) {
   # For a single assignment W:
 }
 # 2) Generate randomizations in base R, filtering by acceptance probability
+#    using T^2 and keep the best (lowest) fraction.
 baseR_generate_randomizations <- function(n_units, n_treated, X, accept_prob, random_type,
                                           max_draws, batch_size) {
   # For safety, check if exact enumerations will explode:
   if (random_type == "exact") {
     n_comb_total <- choose(n_units, n_treated)
     if (n_comb_total > 1e6) {
   list(randomizations = assignment_mat_accepted, balance = T2vals_accepted)
 }
+# Helper: compute difference in means quickly
+diff_in_means <- function(Y, W) {
+  mean(Y[W == 1]) - mean(Y[W == 0])
+}
+# Helper: for a given tau, relabel outcomes and compute the difference in means for a single permutation
+compute_diff_at_tau_for_oneW <- function(Wprime, obsY, obsW, tau) {
+  # Y0_under_null = obsY - obsW * tau
+  Y0 <- obsY - obsW * tau
+  # Y1_under_null = Y0 + tau
+  # But in practice, for assignment Wprime, the observed outcome is:
+  #   Y'(i) = Y0(i) if Wprime(i) = 0, or Y0(i) + tau if Wprime(i)=1
+  Yprime <- Y0
+  Yprime[Wprime == 1] <- Y0[Wprime == 1] + tau
+  diff_in_means(Yprime, Wprime)
+}
+# 3a) For base R randomization test: difference in means + optional p-value
+#     *without* fiducial interval
+# (We will incorporate the FI logic below.)
+baseR_randomization_test <- function(obsW, obsY, allW, findFI = FALSE, alpha = 0.05) {
+  # Observed diff in means
+  tau_obs <- diff_in_means(obsY, obsW)
   # for each candidate assignment, compute diff in means on obsY
+  diffs <- apply(allW, 1, function(w) diff_in_means(obsY, w))
   # p-value = fraction whose absolute diff >= observed
+  pval <- mean(abs(diffs) >= abs(tau_obs))
+  # optionally compute a fiducial interval
+  FI <- NULL
+  if (findFI) {
+    FI <- baseR_find_fiducial_interval(obsW, obsY, allW, tau_obs, alpha = alpha)
+  }
+  list(p_value = pval, tau_obs = tau_obs, FI = FI)
+}
+# 3b) The fiducial interval logic for base R, mirroring the approach in fastrerandomize:
+#     1) Attempt to find a wide lower and upper bracket via random updates
+#     2) Then a grid search in [lowerBound-1, upperBound*2] for which tau are accepted.
+baseR_find_fiducial_interval <- function(obsW, obsY, allW, tau_obs, alpha = 0.05, c_initial = 2,
+                                         n_search_attempts = 500) {
+  # random bracket approach
+  lowerBound_est <- tau_obs - 3*tau_obs
+  upperBound_est <- tau_obs + 3*tau_obs
+  z_alpha <- qnorm(1 - alpha)
+  k <- 2 / (z_alpha * (2 * pi)^(-1/2) * exp(-z_alpha^2 / 2))
+  # For each iteration, pick one random assignment from allW
+  # then see how the implied difference changes, and update the bracket
+  n_allW <- nrow(allW)
+  for (step_t in seq_len(n_search_attempts)) {
+    # pick random assignment
+    idx <- sample.int(n_allW, 1)
+    Wprime <- allW[idx, ]
+    # ~~~~~ update lowerBound ~~~~~
+    # Y0 = obsY - obsW * lowerBound_est
+    # Y'(Wprime) = ...
+    lowerY0 <- obsY - obsW * lowerBound_est
+    Yprime_lower <- lowerY0
+    Yprime_lower[Wprime == 1] <- lowerY0[Wprime == 1] + lowerBound_est
+    tau_at_step_lower <- diff_in_means(Yprime_lower, Wprime)
+    c_step <- c_initial
+    # difference from obs
+    delta <- tau_obs - tau_at_step_lower
+    if (tau_at_step_lower < tau_obs) {
+      # move lowerBound up
+      lowerBound_est <- lowerBound_est + k * delta * (alpha/2) / step_t
+    } else {
+      # move it down
+      lowerBound_est <- lowerBound_est - k * (-delta) * (1 - alpha/2) / step_t
+    }
+    # ~~~~~ update upperBound ~~~~~
+    upperY0 <- obsY - obsW * upperBound_est
+    Yprime_upper <- upperY0
+    Yprime_upper[Wprime == 1] <- upperY0[Wprime == 1] + upperBound_est
+    tau_at_step_upper <- diff_in_means(Yprime_upper, Wprime)
+    delta2 <- tau_at_step_upper - tau_obs
+    if (tau_at_step_upper > tau_obs) {
+      # move upperBound down
+      upperBound_est <- upperBound_est - k * delta2 * (alpha/2) / step_t
+    } else {
+      # move it up
+      upperBound_est <- upperBound_est + k * (-delta2) * (1 - alpha/2) / step_t
+    }
+  }
+  # Now we do a grid search from (lowerBound_est - 1) to (upperBound_est * 2)
+  # in e.g. 100 steps, seeing which tau is "accepted".
+  # We'll define "accepted" if the min of:
+  #    fraction(tau_obs >= distribution_of(tau_pseudo))
+  #    fraction(tau_obs <= distribution_of(tau_pseudo))
+  # is > alpha, i.e. do not reject
+  grid_lower <- lowerBound_est - 1
+  grid_upper <- upperBound_est * 2
+  tau_seq <- seq(grid_lower, grid_upper, length.out = 100)
+  accepted <- logical(length(tau_seq))
+  for (i in seq_along(tau_seq)) {
+    tau_pseudo <- tau_seq[i]
+    # for each row in allW, compute the diff in means if the true effect = tau_pseudo
+    # distribution_of(tau_pseudo)
+    diffs_pseudo <- apply(allW, 1, function(wp) compute_diff_at_tau_for_oneW(wp, obsY, obsW, tau_pseudo))
+    # Then see how often diffs_pseudo >= tau_obs (or <= tau_obs)
+    frac_ge <- mean(diffs_pseudo >= tau_obs)
+    frac_le <- mean(diffs_pseudo <= tau_obs)
+    # min(...) is the typical "two-sided" approach
+    accepted[i] <- (min(frac_ge, frac_le) > alpha / 2) # or 0.05 if we want 5% test
+  }
+  if (!any(accepted)) {
+    # no values accepted => degenerate?
+    # We'll return the bracket we found, or NA.
+    return(c(NA, NA))
+  }
+  c(min(tau_seq[accepted]), max(tau_seq[accepted]))
 }
 # ---------------------------------------------------------
           box(width = 8, title = "Test Results", status = "info", solidHeader = TRUE,
+              # First row: p-value and observed effect (fastrerandomize)
               fluidRow(
                 column(width = 6, valueBoxOutput("pvalue_box", width = 12)),
                 column(width = 6, valueBoxOutput("tauobs_box", width = 12))
                 column(width = 6, valueBoxOutput("baseR_test_time_box", width = 12))
               ),
+              # Show fastrerandomize FI
               uiOutput("fi_text"),
+              # Now show Base R results in a separate row
+              tags$hr(),
+              fluidRow(
+                column(width = 6, valueBoxOutput("pvalue_box_baseR", width = 12)),
+                column(width = 6, valueBoxOutput("tauobs_box_baseR", width = 12))
+              ),
+              fluidRow(
+                column(width = 12, uiOutput("fi_text_baseR"))
+              ),
               br(),
               plotOutput("test_plot", height = "280px")
           )
            "Number treated cannot exceed total units.")
     )
     withProgress(message = "Computing results...", value = 0, {
       # =========== 1) fastrerandomize generation timing ===========
   # Hardware info (CPU cores, GPU note)
   output$hardware_info <- renderUI({
     num_cores <- detectCores(logical = TRUE)
     HTML(paste(
       "<strong>System Hardware Info:</strong><br/>",
       "Number of CPU cores detected:", num_cores, "<br/>",
   observeEvent(input$simulateY_btn, {
     req(RerandResult())
     rr <- RerandResult()
     if (is.null(rr$randomizations) || nrow(rr$randomizations) < 1) {
       showNotification("No accepted randomizations found. Cannot simulate Y for the 'observed' assignment.", type = "error")
       return(NULL)
   baseR_test_time <- reactiveVal(NULL)
   observeEvent(input$run_randtest_btn, {
     withProgress(message = "Computing results...", value = 0, {
       req(RerandResult())
       fastrand_test_time(difftime(t1_testfast, t0_testfast, units = "secs"))
       # =========== 2) base R randomization test timing ===========
       req(RerandResult_base())
       rr_base <- RerandResult_base()
       if (is.null(rr_base$randomizations) || nrow(rr_base$randomizations) < 1) {
         baseR_randomization_test(
           obsW    = obsW,
           obsY    = obsY,
+          allW    = rr_base$randomizations,
+          findFI  = input$findFI  # if user wants the FI, do so
         )
       }, error = function(e) e)
       t1_testbase <- Sys.time()
   output$pvalue_box <- renderValueBox({
     rt <- RandTestResult()
     if (is.null(rt)) {
+      valueBox("---", "p-value (fastrerandomize)", icon = icon("question"), color = "blue")
     } else {
+      valueBox(round(rt$p_value, 4), "p-value (fastrerandomize)", icon = icon("list-check"), color = "purple")
     }
   })
   output$tauobs_box <- renderValueBox({
     rt <- RandTestResult()
     if (is.null(rt)) {
+      valueBox("---", "Observed Effect (fastrerandomize)", icon = icon("question"), color = "maroon")
     } else {
+      valueBox(round(rt$tau_obs, 4), "Observed Effect (fastrerandomize)", icon = icon("bullseye"), color = "maroon")
     }
   })
     }
   })
+  # If we have a fiducial interval from fastrerandomize, display it
   output$fi_text <- renderUI({
     rt <- RandTestResult()
     if (is.null(rt) || is.null(rt$FI)) {
     fi_upper <- round(rt$FI[2], 4)
     tagList(
+      strong("Fiducial Interval (fastrerandomize, 95%):"),
+      p(sprintf("[%.4f, %.4f]", fi_lower, fi_upper))
+    )
+  })
+  # If we have a fiducial interval from base R, display it
+  output$fi_text_baseR <- renderUI({
+    rt <- RandTestResult_base()
+    if (is.null(rt) || is.null(rt$FI)) {
+      return(NULL)
+    }
+    fi_lower <- round(rt$FI[1], 4)
+    fi_upper <- round(rt$FI[2], 4)
+    tagList(
+      strong("Fiducial Interval (base R, 95%):"),
       p(sprintf("[%.4f, %.4f]", fi_lower, fi_upper))
     )
   })
   # A simple plot for the randomization distribution (for demonstration).
+  # In this app, we do not store the entire distribution from either method,
   # so we simply show the observed effect as a point.
   output$test_plot <- renderPlot({
     rt <- RandTestResult()
     if (is.null(rt)) {
       plot.new()
       title("No test results yet.")
       return(NULL)
     }
+    # Just display the observed effect from fastrerandomize
     obs_val <- rt$tau_obs
     ggplot(data.frame(x = obs_val, y = 0), aes(x, y)) +
       geom_point(size=4, color="red") +
       xlim(c(obs_val - abs(obs_val)*2 - 1, obs_val + abs(obs_val)*2 + 1)) +
+      labs(title = "Observed Treatment Effect (fastrerandomize)",
            x = "Effect Size", y = "") +
       theme_minimal(base_size = 14) +
       geom_vline(xintercept = 0, linetype="dashed", color="gray40")