/* ecn1_2004_1.txt program illustrating sampling theory */ closeall; output file = ecn1_2004_1.out reset; /* look at distribution of sample mean from samples of size 20, 100, and 1000, and in each case take 1000 samples */ /* first, let parent distribution be normal with mean 5 and standard deviation 2 */ n_mean = 5; n_sd = 2; s20 = n_mean + n_sd*rndn(20,1000); s100 = n_mean + n_sd*rndn(100,1000); s1000 = n_mean + n_sd*rndn(1000,1000); /* compute sample mean from each */ m20 = meanc(s20); m100 = meanc(s100); m1000 = meanc(s1000); /* now compute the mean and standard deviation for each of these vectors */ sa_20 = meanc(m20); sa_100 = meanc(m100); sa_1000 = meanc(m1000); ssd_20 = stdc(m20); ssd_100 = stdc(m100); ssd_1000 = stdc(m1000); print " mean and sd of sampling dist n = 20 "; print (sa_20 ~ ssd_20); print "theoretical s.e." (n_sd/sqrt(20)); print " for samples of size 100"; print (sa_100 ~ ssd_100); print "theoretical s.e." (n_sd/sqrt(100)); print "for samples of size 1000"; print (sa_1000 ~ ssd_1000 ); print "theoretical s.e." (n_sd/sqrt(1000)); /* plot historgram of sample mean */ library pgraph; graphset; histp(m20,70); histp(m100,70); /* now look at dist of sample mean from a negative exponential distribution */ /* in this case, support of dist is nonnegative real line, c.d.f.(x) = 1 - exp(-alpha*x) p.d.f.(x) = alpha*exp(-alpha*x); */ /* draw random samples of size 20 and 1000 */ alpha = .2; @ implies mean is 5 @ ss_20 = - ln(1-rndu(20,1000)) / alpha; ss_1000 = -ln(1-rndu(1000,1000)) / alpha; mm20 = meanc(ss_20); mm1000 = meanc(ss_1000); print "mean and s.d. of sampling mean for n. exponential case"; print "n = 20"; print (meanc(mm20) ~ stdc(mm20)); print "theoretical s.e." ((1/alpha)/sqrt(20)); print "n = 1000"; print (meanc(mm1000) ~ stdc(mm1000)); print "theoretical s.e." ((1/alpha)/sqrt(1000)); /* look at distribution of the sample means in this case */ histp(mm20,70); @histp(mm1000,70);@ /* bootstrap standard errors */ /* don't know the parent distribution of the random variable, and have access to only one sample idea is to randomly resample from the sample available */ /* take the first sample from the negative exponential for n = 20*/ orig_sample = ss_20[.,1]; @ resample (with replacement) from this one 1000 times @; resamp_mat = zeros(20,1000); i = 1; do until i gt 1000; resamp_mat[.,i] = orig_sample[(floor(rndu(20,1)*20)+1)]; i = i+1; endo; m_bs_20 = meanc(resamp_mat); /* histogram of bootstrap samples */ @histp(m_bs_20,70);@ /* consider other functions of the sample the coefficient of variation s.d. divided by the mean estimate is sample st. dev. divided by sample mean */ /* try with some actual data */ open f1 = ecn1_wag for read; nr = rowsf(f1); x = readr(f1,nr); /* wage is first column in this data set */ wage = x[.,1]; cv = stdc(wage) / meanc(wage); print "cv from sample" cv; /* we want to find an estimate of the standard error for this statistic */ @ resample (with replacement) from this one 1000 times @; resamp_mat = zeros(nr,1000); i = 1; do until i gt 1000; resamp_mat[.,i] = wage[(floor(rndu(nr,1)*nr)+1)]; i = i+1; endo; bs_cv = stdc(resamp_mat) ./ meanc(resamp_mat); bs_est_se = stdc(bs_cv); print "bootstrap estimate of standard error" bs_est_se; end;