#!/usr/local/bin/Rscript # StatTrainer # by Curtis Miller # # This is a Shiny web application. You can run the application by clicking # the 'Run App' button above. # # Find out more about building applications with Shiny here: # # http://shiny.rstudio.com/ # library(shiny) library(BSDA) library(gtools) # Define UI for application that draws a histogram ui <- shinyUI(fluidPage( withMathJax(), titlePanel("StatTrainer"), p(em("by Curtis Miller")), navlistPanel( tabPanel("Introduction", h3("Introduction"), p("This application is intended to give students in MATH 1070 at the University of Utah additional practice in statistical inference. Each item in the navigation bar to the left generates datasets suited to that type of test that students can then use to compute a confidence interval or perform a statistical test. In one tab the data is shown, and in the other the student can see an R analysis of the data to check the student's conclusions. The student can generate as many datasets as desired."), p("This app works for hypothesis testing and confidence intervals. Note, though, that the type of confidence interval computed depends on the alternative hypothesis specified. Choosing a one-sided hypothesis (such as 'less than' or 'greater than' will result in a one-sided confidence interval being compted. To have a two-sided confidence interval, the option 'not equal to' must be chosen.") ), # Test for population mean with known standard deviation # Suffix: pm tabPanel("Test for Population Mean (Known Standard Deviation)", h3("Test for Population Mean (Known Standard Deviation)"), p("Suppose we wish to test for the population mean \$$\\mu\$$ using a dataset of size \$$n\$$, and the population standard deviation \$$\\sigma\$$ is known."), tags$ul(tags$li("The \$$100 \\times C\$$% confidence interval for the mean is: $$\\bar{x} \\pm z^{*} \\frac{\\sigma}{\\sqrt{n}} \\equiv \\left(\\bar{x} - z^{*} \\frac{\\sigma}{\\sqrt{n}} ; \\bar{x} + z^{*} \\frac{\\sigma}{\\sqrt{n}} \\right)$$ (Remember that \$$z^{*}\$$ is the critical value associated with the confidence level \$$C\$$.)"), tags$li("Suppose we test the null hypothesis \$$H_0:\\mu = \\mu_0 \$$ against some alternative hypothesis, with \$$\\alpha\$$ level of significance. The test statistic will be: $$z = \\frac{\\bar{x} - \\mu_0}{\\frac{\\sigma}{\\sqrt{n}}}$$"), tags$ul(tags$li("If the alternative hypothesis is \$$H_A: \\mu < \\mu_0\$$, our p-value is \$$p_{\\text{val}} = P(Z < z) \$$ (where \$$Z\$$ is a standard normal random variable)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu \\neq \\mu_0\$$, our p-value is \$$p_{\\text{val}} = 2P(Z > |z|) \$$ (where \$$Z\$$ is a standard normal random variable)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu > \\mu_0\$$, our p-value is \$$p_{\\text{val}} = P(Z > z) \$$ (where \$$Z\$$ is a standard normal random variable)."))), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_pm", "Regenerate"), hr(), radioButtons("alt_pm", "Alternative Hypothesis", choices = list("Less Than" = "less", "Not Equal To" = "two.sided", "Greater Than" = "greater"), selected = "two.sided"), hr(), htmlOutput("sd_pm"), htmlOutput("h0_pm"), htmlOutput("hA_pm") ), mainPanel(tableOutput("data_pm")) )), tabPanel("Analysis", verbatimTextOutput("test_pm"), numericInput("c_pm", "Confidence Level", value = 95, min = 1, max = 99.9, step = .1) ) )), # Test for population mean with unknown standard deviation # Suffix: pmt tabPanel("Test for Population Mean (Unknown Standard Deviation)", h3("Test for Population Mean (Unknown Standard Deviation)"), p("Suppose we wish to test for the population mean \$$\\mu\$$ using a dataset of size \$$n\$$, and the population standard deviation \$$\\sigma\$$ is not known."), tags$ul(tags$li("The \$$100 \\times C\$$% confidence interval for the mean is: $$\\bar{x} \\pm t^{*} \\frac{s}{\\sqrt{n}} \\equiv \\left(\\bar{x} - t^{*} \\frac{s}{\\sqrt{n}} ; \\bar{x} + t^{*} \\frac{s}{\\sqrt{n}} \\right)$$ (Remember that \$$t^{*}\$$ is the critical value associated with the confidence level \$$C\$$.)"), tags$li("Suppose we test the null hypothesis \$$H_0:\\mu = \\mu_0 \$$ against some alternative hypothesis, with \$$\\alpha\$$ level of significance. The test statistic will be: $$t = \\frac{\\bar{x} - \\mu_0}{\\frac{s}{\\sqrt{n}}}$$"), tags$ul(tags$li("If the alternative hypothesis is \$$H_A: \\mu < \\mu_0\$$, our p-value is \$$p_{\\text{val}} = P(T < t) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu = n - 1\$$ degrees of freedom)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu \\neq \\mu_0\$$, our p-value is \$$p_{\\text{val}} = 2P(T > |t|) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu = n - 1\$$ degrees of freedom)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu > \\mu_0\$$, our p-value is \$$p_{\\text{val}} = P(T > t) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu = n - 1\$$ degrees of freedom)."))), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_pmt", "Regenerate"), hr(), radioButtons("alt_pmt", "Alternative Hypothesis", choices = list("Less Than" = "less", "Not Equal To" = "two.sided", "Greater Than" = "greater"), selected = "two.sided"), hr(), htmlOutput("h0_pmt"), htmlOutput("hA_pmt") ), mainPanel(tableOutput("data_pmt")) )), tabPanel("Analysis", verbatimTextOutput("test_pmt"), numericInput("c_pmt", "Confidence Level", value = 95, min = 1, max = 99.9, step = .1) ) )), # Test for difference in means of paired data # Suffix: dp tabPanel("Test for Difference in Means of Paired Data", h3("Test for Difference in Means of Paired Data"), p("Suppose we have paired data, \$$X_i\$$ and \$$Y_i\$$ with \$$D_i=X_i-Y_i \\sim N(\\mu_D, \\sigma)\$$. We wish to determine whether the two population means, \$$\\mu_X\$$ and \$$\\mu_Y\$$, are the same (i.e. \$$\\mu_D = 0\$$) or different (i.e. \$$\\mu_D \\neq 0\$$)."), tags$ul(tags$li("The \$$100 \\times C\$$% confidence interval for the mean difference is: $$\\bar{d} \\pm t^{*} \\frac{s_d}{\\sqrt{n}} \\equiv \\left(\\bar{d} - t^{*} \\frac{s_d}{\\sqrt{n}} ; \\bar{d} + t^{*} \\frac{s_d}{\\sqrt{n}} \\right)$$ (Remember that \$$d_i = x_i - y_i\$$, \$$s_d\$$ the standard deviation of \$$d_{\\cdot}\$$, and \$$t^{*}\$$ is the critical value associated with the confidence level \$$C\$$.)"), tags$li("Suppose we test the null hypothesis \$$H_0: \\mu_X = \\mu_Y\$$ against some alternative hypothesis, with \$$\\alpha\$$ level of significance. The test statistic will be: $$t = \\frac{\\bar{d}}{\\frac{s}{\\sqrt{n}}}$$"), tags$ul(tags$li("If the alternative hypothesis is \$$H_A: \\mu_X < \\mu_Y\$$, our p-value is \$$p_{\\text{val}} = P(T < t) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu = n - 1\$$ degrees of freedom)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu_X \\neq \\mu_Y\$$, our p-value is \$$p_{\\text{val}} = 2P(T > |t|) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu = n - 1\$$ degrees of freedom)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu_X > \\mu_Y\$$, our p-value is \$$p_{\\text{val}} = P(T > t) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu = n - 1\$$ degrees of freedom)."))), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_dp", "Regenerate"), hr(), radioButtons("alt_dp", "Alternative Hypothesis", choices = list("Less Than" = "less", "Not Equal To" = "two.sided", "Greater Than" = "greater"), selected = "two.sided"), hr(), htmlOutput("h0_dp"), htmlOutput("hA_dp") ), mainPanel(tableOutput("data_dp")) )), tabPanel("Analysis", verbatimTextOutput("test_dp"), numericInput("c_dp", "Confidence Level", value = 95, min = 1, max = 99.9, step = .1) ) )), # Test for difference in means of unpaired data # Suffix: du tabPanel("Test for Difference in Means of Unpaired Data", h3("Test for Difference in Means of Unpaired Data"), p("Suppose we have unpaired data, \$$X_i \\sim N(\\mu_x, \\sigma_x)\$$ and \$$Y_i \\sim N(\\mu_y, \\sigma_y)\$$. We wish to determine whether the two population means, \$$\\mu_x\$$ and \$$\\mu_y\$$, are the same or different."), tags$ul(tags$li("The \$$100 \\times C\$$% confidence interval for the mean difference is: $$\\bar{x} - \\bar{y} \\pm t^{*} \\sqrt{\\frac{s_x^2}{n_x} + \\frac{s_y^2}{n_y}} \\equiv \\left(\\bar{x} - \\bar{y} - t^{*} \\sqrt{\\frac{s_x^2}{n_x} + \\frac{s_y^2}{n_y}} ; \\bar{x} - \\bar{y} + t^{*} \\sqrt{\\frac{s_x^2}{n_x} + \\frac{s_y^2}{n_y}} \\right)$$ (Remember that \$$d_i = x_i - y_i\$$, \$$s_d\$$ the standard deviation of \$$d_{\\cdot}\$$, and \$$t^{*}\$$ is the critical value associated with the confidence level \$$C\$$.)"), tags$li("Suppose we test the null hypothesis \$$H_0: \\mu_x = \\mu_y\$$ against some alternative hypothesis, with \$$\\alpha\$$ level of significance. The test statistic will be: $$t = \\frac{\\bar{x} - \\bar{y}}{\\sqrt{\\frac{s_x^2}{n_x} + \\frac{s_y^2}{n_y}}}$$"), tags$ul(tags$li("If the alternative hypothesis is \$$H_A: \\mu_x < \\mu_y\$$, our p-value is \$$p_{\\text{val}} = P(T < t) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu \\approx \\frac{\\left(\\frac{s_x^2}{n_x} + \\frac{s_x^2}{n_x}\\right)^2}{\\frac{1}{n_x - 1}\\left(\\frac{s_x^2}{n_x}\\right)^2 + \\frac{1}{n_y - 1}\\left(\\frac{s_y^2}{n_y}\\right)^2} \$$ degrees of freedom, or \$$\\nu = \\min(n_x, n_y) - 1\$$ for an easier yet conservative test)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu_x \\neq \\mu_y\$$, our p-value is \$$p_{\\text{val}} = 2P(T > |t|) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu \\approx \\frac{\\left(\\frac{s_x^2}{n_x} + \\frac{s_x^2}{n_x}\\right)^2}{\\frac{1}{n_x - 1}\\left(\\frac{s_x^2}{n_x}\\right)^2 + \\frac{1}{n_y - 1}\\left(\\frac{s_y^2}{n_y}\\right)^2} \$$ degrees of freedom, or \$$\\nu = \\min(n_x, n_y) - 1\$$ for an easier yet conservative test)."), tags$li("If the alternative hypothesis is \$$H_A: \\mu_x > \\mu_y\$$, our p-value is \$$p_{\\text{val}} = P(T > t) \$$ (where \$$T\$$ is a \$$t\$$-distributed random variable with \$$\\nu \\approx \\frac{\\left(\\frac{s_x^2}{n_x} + \\frac{s_x^2}{n_x}\\right)^2}{\\frac{1}{n_x - 1}\\left(\\frac{s_x^2}{n_x}\\right)^2 + \\frac{1}{n_y - 1}\\left(\\frac{s_y^2}{n_y}\\right)^2} \$$ degrees of freedom, or \$$\\nu = \\min(n_x, n_y) - 1\$$ for an easier yet conservative test)."))), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_du", "Regenerate"), hr(), radioButtons("alt_du", "Alternative Hypothesis", choices = list("Less Than" = "less", "Not Equal To" = "two.sided", "Greater Than" = "greater"), selected = "two.sided"), hr(), htmlOutput("h0_du"), htmlOutput("hA_du") ), mainPanel(splitLayout(tableOutput("datax_du"), tableOutput("datay_du"))) )), tabPanel("Analysis", verbatimTextOutput("test_du"), numericInput("c_du", "Confidence Level", value = 95, min = 1, max = 99.9, step = .1) ) )), # Test of population proportion # Suffix: pp tabPanel("Test of Population Proportion", h3("Test of Population Proportion"), p("Suppose we have data \$$X_i\$$ that equals 1 in the event of a success and 0 for a failure. \$$p\$$ denotes the population proportion of drawing a success from the population."), tags$ul(tags$li("The \$$100 \\times C\$$% confidence interval for the population proportion is given by: \$$\\hat{p} \\pm z^{*} \\sqrt{\\frac{\\hat{p}(1-\\hat{p})}{n}}\$$, where \$$\\hat{p}\$$ is the sample proportion of successes and \$$z^{*}\$$ the associated critical value (from the standard Normal distribution)."), tags$li("Suppose we test the null hypothesis \$$H_0: p = p_0\$$ against some alternative hypothesis, with \$$\\alpha\$$ level of significance. The test statistic will be: $$z = \\frac{\\hat{p} - p_0}{\\sqrt{\\frac{p_0(1-p_0)}{n}}}$$"), tags$ul(tags$li("If the alternative hypothesis is \$$H_A: p < p_0\$$, our p-value is \$$p_{\\text{val}} = P(Z < z) \$$ (where \$$Z\$$ is a standard Normal random variable)."), tags$li("If the alternative hypothesis is \$$H_A: p \\neq p_0\$$, our p-value is \$$p_{\\text{val}} = 2P(Z > |z|) \$$ (where \$$Z\$$ is a standard Normal random variable)."), tags$li("If the alternative hypothesis is \$$H_A: p > p_0\$$, our p-value is \$$p_{\\text{val}} = P(Z > z) \$$ (where \$$Z\$$ is a standard Normal random variable)."))), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_pp", "Regenerate"), hr(), radioButtons("alt_pp", "Alternative Hypothesis", choices = list("Less Than" = "less", "Not Equal To" = "two.sided", "Greater Than" = "greater"), selected = "two.sided"), hr(), htmlOutput("h0_pp"), htmlOutput("hA_pp") ), mainPanel(verbatimTextOutput("data_pp")) )), tabPanel("Analysis", verbatimTextOutput("test_pp"), numericInput("c_pp", "Confidence Level", value = 95, min = 1, max = 99.9, step = .1) ) )), # Test of goodness of fit # Suffix: gf tabPanel("Test of Goodness of Fit", h3("Test of Goodness of Fit"), p("Suppose we have a categorical random variable \$$X_i\$$ that takes value 1 with probability \$$p_1\$$, value 2 with probability \$$p_2\$$, ..., value \$$K\$$ with probability \$$p_K\$$ Under the null hypothesis we have \$$H_0: p_1 = p_{10}, p_2 = p_{20}, ..., p_K = p_{K0}\$$, and the alterlative hypothesis is \$$H_A: H_0 \\text{ is false}\$$. The data set consists of observed counts \$$O_1, O_2, ..., O_K\$$, representing how much data was in each group. The sample size is the grand total, \$$N = \\sum_{i = 1}^{K} O_i\$$."), tags$ol(tags$li("The \$$\\chi^2\$$ statistic is \$$\\chi^2 = \\sum_{i = 1}^{K} \\frac{(O_i - E_i)^2}{E_i}\$$, with \$$E_i\$$ being the expected count for group \$$i\$$ under \$$H_0\$$, that is, \$$E_i = N p_{i0}\$$."), tags$li("Let \$$\\chi^2_\\nu \\sim \\chi^2(\\nu)\$$ be a random variable following the \$$\\chi^2\$$-distribution with \$$\\nu = K - 1\$$ degrees of freedom. The \$$p\$$-value is \$$p_{val} = P\\left(\\chi^2_\\nu > \\chi^2 \\right)\$$.")), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_gf", "Regenerate"), hr(), htmlOutput("h0_gf"), htmlOutput("hA_gf") ), mainPanel(verbatimTextOutput("data_gf")) )), tabPanel("Analysis", verbatimTextOutput("test_gf") ) )), # Test of independence # Suffix: id tabPanel("Test of Independence", h3("Test of Independence"), p("Suppose we have two categorical random variable \$$X_i\$$ and \$$Y_j\$$, where \$$X_i\$$ takes value 1 with probability \$$p_1\$$, value 2 with probability \$$p_2\$$, ..., value \$$J\$$ with probability \$$p_J\$$. Likewise, \$$Y_j\$$ takes value 1 with probability \$$q_1\$$, value 2 with probability \$$q_2\$$, ..., value \$$K\$$ with probability \$$q_K\$$ Under the null hypothesis we have \$$H_0: X \\text{ and } Y \\text{ are independent}\$$ (that is, we have \$$H_0: \\text{For every } i \\text{ and } j \\text{, } P(X_i = i \\text{ and } Y_j = j) = p_i q_j\$$), and the alterlative hypothesis is \$$H_A: H_0 \\text{ is false}\$$. The data set consists of observed counts \$$O_{ij}\$$ for each combination of possible outcomes for both \$$X\$$ and \$$Y\$$, representing how much data was in each combination of groups. The sample size is the grand total, \$$N = \\sum_{i, j} O_{ij}\$$."), tags$ol(tags$li("The \$$\\chi^2\$$ statistic is \$$\\chi^2 = \\sum_{i, j} \\frac{(O_{ij} - E_{ij})^2}{E_{ij}}\$$, with \$$E_{ij}\$$ being the expected count for combination of groups \$$i, j\$$ under \$$H_0\$$, that is, \$$E_{ij} = \\frac{R_i C_j}{N}\$$, where \$$R_i\$$ is the total count for row \$$i\$$ (that is, \$$R_i = \\sum_{j = 1}^{K} O_{ij}\$$), and \$$C_j\$$ is the total count for column \$$j\$$ (that is, \$$C_j = \\sum_{i = 1}^{J} O_{ij}\$$)."), tags$li("Let \$$\\chi^2_\\nu \\sim \\chi^2(\\nu)\$$ be a random variable following the \$$\\chi^2\$$-distribution with \$$\\nu = (J - 1)(K - 1)\$$ degrees of freedom (that is, the degrees of freedom is one less than the number of rows times one less than the number of columns). The \$$p\$$-value is \$$p_{val} = P\\left(\\chi^2_\\nu > \\chi^2 \\right)\$$.")), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_id", "Regenerate"), hr(), htmlOutput("h0_id"), htmlOutput("hA_id") ), mainPanel(verbatimTextOutput("data_id")) )), tabPanel("Analysis", verbatimTextOutput("test_id") ) )), # ANOVA # Suffix: aov tabPanel("ANOVA", h3("ANOVA"), p("Suppose we have \$$K\$$ populations. Collect a random sample of size \$$n_1\$$ from population 1, \$$n_2\$$ from population 2, ..., \$$n_K\$$ from population \$$K\$$. We assume all populations have the same standard deviation (and each are Normally distributed). We wish to test \$$H_0: \\mu_1 = \\mu_2 = ... = \\mu_K\$$ against \$$H_A: H_0 \\text{ is false}\$$."), tags$ol(tags$li("Let \$$N = \\sum_{k = 1}^{K} n_k\$$ be the grand total, \$$\\overline{x}_k = \\frac{1}{n_k} \\sum_{i = 1}^{n_k} x_{ki}\$$ be the sample mean for sample \$$k\$$, and \$$\\overline{x}_{\\cdot} = \\frac{1}{N} \\sum_{k = 1}^{K} \\sum_{i = 1}^{n_k} x_{ki} \$$ be the grand mean. The \$$F\$$-statistic is \$$f = \\frac{\\sum_{k = 1}^{K}\\left(\\overline{x}_k - \\overline{x}_{\\cdot}\\right)^2/(K - 1)}{\\sum_{k = 1}^{K} \\sum_{i = 1}^{n_k} \\left(x_{ki} - \\overline{x}_k\\right)^2/(N - K)} \$$."), tags$li("Let \$$F_{\\nu_1, \\nu_2} \\sim F(\\nu_1, \\nu_2)\$$ denote an \$$F\$$-distributed random variable, where the numerator degrees of freedom \$$\\nu_1 = K - 1\$$ and denominator degrees of freedom \$$\\nu_2 = N - K\$$. The \$$p\$$-value for the test is \$$p_{val} = P(F_{\\nu_1, \\nu_2} > f)\$$.")), # Actual app tabsetPanel( tabPanel("Data", sidebarLayout( sidebarPanel( actionButton("gen_aov", "Regenerate"), hr(), htmlOutput("h0_aov"), htmlOutput("hA_aov") ), mainPanel(verbatimTextOutput("data_aov")) )), tabPanel("Analysis", verbatimTextOutput("test_aov") ) )) ) )) # Define server logic required to draw a histogram server <- shinyServer(function(input, output) { # Reactive function for the test for mean with known standard deviation pm_out <- reactive({ # Make a change when generate button clicked input$gen_pm # Random sample size n <- sample(8:20, size = 1) # Pick null mean nullmean <- round(rcauchy(1, location = 10, scale = 2), digits = 1) # Random population sd sd <- round(rchisq(1, df = ceiling(log(abs(nullmean)))), digits = 1) # Decide to make the null or alternative hypothesis true and get difference between null and true mean based on this if (sample(c(T,F), size = 1)) { deltamean <- rnorm(1, sd = 2) } else { deltamean <- 0 } if (input$alt_pm == "less") { deltamean <- -abs(deltamean) } else if (input$alt_pm == "greater") { deltamean <- abs(deltamean) } # Generate dataset dataset <- round(rnorm(n = n, mean = nullmean + deltamean, sd = sd), digits = 2) return(list("n" = n, "nullmean" = nullmean, "sd" = sd, "dataset" = dataset)) }) # Render pm elements output$sd_pm <- renderUI({ res <- pm_out() withMathJax(helpText(paste("\$$\\sigma = ", ressd, "\$$", collapse = ""))) }) output$h0_pm <- renderUI({ res <- pm_out() withMathJax(helpText(paste("\$$H_0:\\mu = ", resnullmean, "\$$", collapse = ""))) }) output$hA_pm <- renderUI({ res <- pm_out() if (input$alt_pm == "less") { eqsign <- "<" } else if (input$alt_pm == "greater") { eqsign <- ">" } else { eqsign <- "\\neq" } withMathJax(helpText(paste("\$$H_A:\\mu ", eqsign, resnullmean, "\$$", collapse = ""))) }) output$data_pm <- renderTable({ res <- pm_out() data.frame(x = res$dataset) }) output$test_pm <- renderPrint({ res <- pm_out() x <- res$dataset result <- z.test(x, alternative = input$alt_pm, mu = res$nullmean, sigma.x = res$sd, conf.level = input$c_pm / 100) print(result) }) # Reactive function for the test for mean with unknown standard deviation pmt_out <- reactive({ # Make a change when generate button clicked input$gen_pmt # Random sample size n <- sample(8:20, size = 1) # Pick null mean nullmean <- round(rcauchy(1, location = 10, scale = 2), digits = 1) # Random population sd sd <- round(rchisq(1, df = ceiling(log(abs(nullmean)))), digits = 1) # Decide to make the null or alternative hypothesis true and get difference between null and true mean based on this if (sample(c(T,F), size = 1)) { deltamean <- rnorm(1, sd = 2) } else { deltamean <- 0 } if (input$alt_pmt == "less") { deltamean <- -abs(deltamean) } else if (input$alt_pmt == "greater") { deltamean <- abs(deltamean) } # Generate dataset dataset <- round(rnorm(n = n, mean = nullmean + deltamean, sd = sd), digits = 2) return(list("n" = n, "nullmean" = nullmean, "dataset" = dataset)) }) # Render pmt elements output$h0_pmt <- renderUI({ res <- pmt_out() withMathJax(helpText(paste("\$$H_0:\\mu = ", resnullmean, "\$$", collapse = ""))) }) output$hA_pmt <- renderUI({ res <- pmt_out() if (input$alt_pmt == "less") { eqsign <- "<" } else if (input$alt_pmt == "greater") { eqsign <- ">" } else { eqsign <- "\\neq" } withMathJax(helpText(paste("\$$H_A:\\mu ", eqsign, resnullmean, "\$$", collapse = ""))) }) output$data_pmt <- renderTable({ res <- pmt_out() data.frame(x = res$dataset) }) output$test_pmt <- renderPrint({ res <- pmt_out() x <- res$dataset result <- t.test(x, alternative = input$alt_pmt, mu = res$nullmean, conf.level = input$c_pmt / 100) print(result) }) # Reactive function for the test for difference in mean of paired data dp_out <- reactive({ # Make a change when generate button clicked input$gen_dp # Random sample size n <- sample(8:20, size = 1) # Pick null mean nullmean <- round(rcauchy(1, location = 10, scale = 2), digits = 1) # Random population sd sd <- round(rchisq(1, df = ceiling(log(abs(nullmean)))), digits = 1) # Random change sd sd_d <- round(rchisq(1, df = runif(1) * ceiling(log(abs(nullmean)))), digits = 1) + 0.1 # Decide to make the null or alternative hypothesis true and get difference between null and true mean based on this if (sample(c(T,F), size = 1)) { deltamean <- rnorm(1, sd = 2) } else { deltamean <- 0 } if (input$alt_dp == "less") { deltamean <- -abs(deltamean) } else if (input$alt_dp == "greater") { deltamean <- abs(deltamean) } # Generate datasets dataset_x <- round(rnorm(n = n, mean = nullmean, sd = sd), digits = 2) dataset_y <- dataset_x + round(rnorm(n = n, mean = deltamean, sd = sd_d), digits = 2) return(list("n" = n, "dataset_x" = dataset_x, "dataset_y" = dataset_y)) }) # Render pmt elements output$h0_dp <- renderUI({ res <- dp_out() withMathJax(helpText(paste("\$$H_0:\\mu_X = \\mu_Y\$$", collapse = ""))) }) output$hA_dp <- renderUI({ res <- dp_out() if (input$alt_dp == "less") { eqsign <- "<" } else if (input$alt_dp == "greater") { eqsign <- ">" } else { eqsign <- "\\neq" } withMathJax(helpText(paste("\$$H_A:\\mu_X ", eqsign, " \\mu_Y\$$", collapse = ""))) }) output$data_dp <- renderTable({ res <- dp_out() data.frame(x = res$dataset_x, y = res$dataset_y) }) output$test_dp <- renderPrint({ res <- dp_out() d <- res$dataset_x - res$dataset_y result <- t.test(d, alternative = input$alt_dp, mu = 0, conf.level = input$c_dp / 100) print(result) }) # Reactive function for the test for difference in mean of unpaired data du_out <- reactive({ # Make a change when generate button clicked input$gen_du # Random sample sizes nx <- sample(8:20, size = 1) ny <- sample(8:20, size = 1) # Pick null mean nullmean <- round(rcauchy(1, location = 10, scale = 2), digits = 1) # Random population sd sd_x <- round(rchisq(1, df = ceiling(log(abs(nullmean)))), digits = 1) # Random change sd sd_y <- round(rchisq(1, df = ceiling(log(abs(nullmean)))), digits = 1) # Decide to make the null or alternative hypothesis true and get difference between null and true mean based on this if (sample(c(T,F), size = 1)) { deltamean <- rnorm(1, sd = 2) } else { deltamean <- 0 } if (input$alt_du == "less") { deltamean <- -abs(deltamean) } else if (input$alt_du == "greater") { deltamean <- abs(deltamean) } # Generate datasets dataset_x <- round(rnorm(n = nx, mean = nullmean, sd = sd_x), digits = 2) dataset_y <- round(rnorm(n = ny, mean = nullmean + deltamean, sd = sd_y), digits = 2) return(list("nx" = nx, "ny" = ny, "dataset_x" = dataset_x, "dataset_y" = dataset_y)) }) # Render pmt elements output$h0_du <- renderUI({ res <- du_out() withMathJax(helpText(paste("\$$H_0:\\mu_X = \\mu_Y\$$", collapse = ""))) }) output$hA_du <- renderUI({ res <- du_out() if (input$alt_du == "less") { eqsign <- "<" } else if (input$alt_du == "greater") { eqsign <- ">" } else { eqsign <- "\\neq" } withMathJax(helpText(paste("\$$H_A:\\mu_X ", eqsign, " \\mu_Y\$$", collapse = ""))) }) output$datax_du <- renderTable({ res <- du_out() data.frame(x = res$dataset_x) }) output$datay_du <- renderTable({ res <- du_out() data.frame(y = res$dataset_y) }) output$test_du <- renderPrint({ res <- du_out() x <- res$dataset_x y <- res$dataset_y result <- t.test(x, y, alternative = input$alt_du, mu = 0, conf.level = input$c_du / 100) print(result) }) # Reactive function for the test for population proportion pp_out <- reactive({ # Make a change when generate button clicked input$gen_pp # Random sample size n <- sample(10:100, size = 1) # Pick null proportion nullp <- round(rbeta(1,2,2), digits = 2) # Decide to make the null or alternative hypothesis true and get difference between null and true mean based on this if (sample(c(T,F), size = 1)) { alpha <- round(10*nullp) beta <- 10 - alpha altp <- rbeta(1, alpha, beta) if (input$alt_pm == "less") { while (altp > nullp) { altp <- rbeta(1, alpha, beta) } } else if (input$alt_pm == "greater") { while (altp < nullp) { altp <- rbeta(1, alpha, beta) } } } else { altp <- nullp } # Generate dataset altp <- round(altp, digits = 2) dataset <- rbinom(n, 1, altp) return(list("n" = n, "nullp" = nullp, "dataset" = dataset)) }) # Render pp elements output$h0_pp <- renderUI({ res <- pp_out() withMathJax(helpText(paste("\$$H_0:p = ", resnullp, "\$$", collapse = ""))) }) output$hA_pp <- renderUI({ res <- pp_out() if (input$alt_pp == "less") { eqsign <- "<" } else if (input$alt_pp == "greater") { eqsign <- ">" } else { eqsign <- "\\neq" } withMathJax(helpText(paste("\$$H_A:p ", eqsign, resnullp, "\$$", collapse = ""))) }) output$data_pp <- renderPrint({ res <- pp_out() cat("Number of trials: ", res$n, "\n", "Number of successes: ", sum(res$dataset), sep = "") }) output$test_pp <- renderPrint({ res <- pp_out() x <- res$dataset result <- z.test(x, alternative = input$alt_pp, mu = res$nullp, sigma.x = sqrt(res$nullp*(1-res$nullp)), conf.level = input$c_pp / 100) c_int_res <- z.test(x, alternative = input$alt_pp, mu = res$nullp, sigma.x = sqrt(mean(x)*(1-mean(x))), conf.level = input$c_pp / 100) result$conf.int <- c_int_res$conf.int print(result) }) # Reactive function for the test for goodness of fit gf_out <- reactive({ # Make a change when generate button clicked input$gen_gf # Random number of groups K <- sample(3:6, size = 1) # Random sample size n <- sample((10*K):(30*K), size = 1) # Pick null proportions nullp <- round(rdirichlet(1, rep(3, times = K)), digits = 2) # Decide to make the null or alternative hypothesis true and sample based on this if (sample(c(T,F), size = 1)) { altp <- round(rdirichlet(1, nullp * 100), digits = 2) altp <- round(rdirichlet(1, altp * 100), digits = 2) } else { altp <- nullp } if (sum(nullp) != 1) { np_diff <- 1 - sum(nullp) nullp[1] <- nullp[1] + np_diff } if (sum(altp) != 1) { np_diff <- 1 - sum(altp) altp[1] <- altp[1] + np_diff } # Generate dataset dataset <- table(sample(1:K, prob = altp, size = n, replace = TRUE)) + rep(5, times = K) return(list("K" = K, "nullp" = nullp[1,], "dataset" = dataset)) }) # Render gf elements output$h0_gf <- renderUI({ res <- gf_out() withMathJax(helpText(paste("\$$H_0: \\begin{cases}", paste("p_", paste(1:resK, resnullp, sep = " = "), collapse = " & \\\\ "), " \\end{cases} \$$", collapse = ""))) }) output$hA_gf <- renderUI({ withMathJax(helpText("\$$H_A: H_0 \\text{ is false} \$$")) }) output$data_gf <- renderPrint({ res <- gf_out() print(res$dataset) }) output$test_gf <- renderPrint({ res <- gf_out() dataset <- res$dataset print(chisq.test(dataset, p = res$nullp)) }) # Reactive function for the test for independence id_out <- reactive({ # Make a change when generate button clicked input$gen_id # Random number of groups J <- sample(2:4, size = 1) K <- sample(2:4, size = 1) # Random sample size n <- sample((10*J*K):(30*J*K), size = 1) # Pick proportions for each group p1 <- as.vector(rdirichlet(1, rep(3, times = J))) p2 <- as.vector(rdirichlet(1, rep(3, times = K))) # Generate a vector representing independent data; every K observations is a row (J rows) nulldist <- rep(p1, each = K) * p2 # Decide to make the null or alternative hypothesis true and sample based on this if (sample(c(T,F), size = 1)) { altdist <- rdirichlet(1, 100 * nulldist) altdist <- rdirichlet(1, 100 * altdist) } else { altdist <- nulldist } # Generate dataset dataset_interim <- table(sample(1:(J*K), prob = altdist, size = n, replace = TRUE)) + rep(5, times = J*K) dataset <- matrix(dataset_interim, nrow = J, byrow = TRUE) colnames(dataset) <- 1:K rownames(dataset) <- 1:J return(list("dataset" = dataset)) }) # Render id elements output$h0_id <- renderUI({ res <- id_out() withMathJax(helpText("\$$H_0: \\text{Independence} \$$")) }) output$hA_id <- renderUI({ withMathJax(helpText("\$$H_A: H_0 \\text{ is false} \$$")) }) output$data_id <- renderPrint({ res <- id_out() print(res$dataset) }) output$test_id <- renderPrint({ res <- id_out() dataset <- res$dataset print(chisq.test(dataset, correct = FALSE)) }) # Reactive function for ANOVA aov_out <- reactive({ # Make a change when generate button clicked input$gen_aov # Random number of groups K <- sample(3:6, size = 1) # Random sample sizes nk <- sample(5:10, replace = TRUE, size = K) N <- sum(nk) # Pick null mean nullmean <- round(rcauchy(1, location = 10, scale = 2), digits = 1) # Random population sd sd <- round(rchisq(1, df = ceiling(log(abs(nullmean)))), digits = 1) # Decide to make the null or alternative hypothesis true and sample based on this altmeans <- rep(nullmean, times = K) if (sample(c(T,F), size = 1)) { ndiff <- sample(1:K, size = 1) altmeans[sample(1:K, size = ndiff)] <- rnorm(ndiff, mean = nullmean, sd = 1 * sd) } # Generate datasets dataset <- lapply(1:K, function(k) { return(round(rnorm(nk[k], mean = altmeans[k], sd = sd), digits = 1)) }) return(list("dataset" = dataset)) }) # Render id elements output$h0_aov <- renderUI({ res <- aov_out() withMathJax(helpText("\$$H_0: \\text{Common mean} \$$")) }) output$hA_aov <- renderUI({ withMathJax(helpText("\$$H_A: H_0 \\text{ is false} \$$")) }) output$data_aov <- renderPrint({ res <- aov_out() dataset <- res$dataset names(dataset) <- 1:length(dataset) dataset <- stack(dataset) names(dataset) <- c("Value", "Population") testres <- aov(Value ~ Population, data = dataset) cat("f =", summary(testres)[[1]]$F value[1], "\nN =", nrow(dataset), "\n\n\n") for (k in 1:length(res$dataset)) { cat("Sample", k, "\n") cat(res$dataset[[k]], "\n\n") } }) output$test_aov <- renderPrint({ res <- aov_out() dataset <- res$dataset names(dataset) <- 1:length(dataset) dataset <- stack(dataset) names(dataset) <- c("Value", "Population") testres <- aov(Value ~ Population, data = dataset) print(summary(testres)[[1]]) }) }) # Run the application shinyApp(ui = ui, server = server)