/* ------------------------------------------- # AGREE_COEFF2.SAS # (July 24, 2021) #Description: This script file contains a series of SAS/IML functions for computing various agreement coefficients # for 2 raters when the input data file is in the form of a contingency table showing the distribution # of subjects by rater, and by category. #Author: Kilem L. Gwet, Ph.D. ------------------------------------------- */ /*- unique_vec: take a vector 'vect' as input and will output a vector of unique non-missing elements of vect. -*/ start unique_vec(vect); uvec = t(unique(vect));/*find unique elements of vectors*/ vec_miss = cmiss(uvec);/*binary vector =1 if missing =0 otherwise*/ loc_nmiss = loc(vec_miss=0);/*location of non-missing elements of the vectors*/ unvec = uvec[loc_nmiss];/* extract non-missing values from vector of unique values*/ return(unvec); finish; /* #==================================================================================== #bp2_table: Brennan-Prediger coefficient (Brennan & Prediger (1981)), its standard error and associated confidence interval for 2 raters when input dataset is a contingency table with no missing rating_ (Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) #------------- #The input data "x2Tab" is a qxq contingency table showing the distribution of subjects by rater, when q is the number of categories_ (1) Output parameters: bp_coeff,stderr,conf_int,p_value, (2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7 (3) Examples: run bp2_table(bp_coeff,stderr,conf_int,p_value,xtab,,,); print bp_coeff,stderr,conf_int,p_value; #====================================================================================== */ start bp2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); n = sum(x2Tab); /* number of subjects */ f = n/Npop; /* finite population correction */ q = ncol(x2Tab); /* number of categories */ pa = sum(weights # x2Tab/n); /* percent agreement */ tw = sum(weights); pe = tw/(q##2); bp_coeff = (pa - pe)/(1 - pe); /* Brennan-Prediger coefficint */ /* calculation of variance - standard error - confidence interval - p-value */ pkl = x2Tab/n; /* p_{kl} */ sum1 = 0; do k=1 to q; do l=1 to q; sum1 = sum1 + pkl[k,l] * weights[k,l]##2; end; end; var_bp = ((1-f)/(n*(1-pe)##2)) * (sum1 - pa##2); stderr = sqrt(var_bp); /* bp's standard error */ p_value = 2*(1-cdf('T',bp_coeff/stderr,n-1)); lcb = bp_coeff - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */ ucb = min(1,bp_coeff + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */ conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")"; return([bp_coeff,stderr,conf_int,p_value]); finish; /* #==================================================================================== #gwet_ac1_table: Gwet's AC1/Ac2 coefficient (Gwet(2008)), its standard error and associated confidence interval for 2 raters when input dataset is a contingency table with no missing rating_ (Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) #------------- #The input data "x2Tab" is a qxq contingency table showing the distribution of subjects by rater, when q is the number of categories_ (1) Output parameters: gwet_ac1,stderr,conf_int,p_value, (2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7 (3) Examples: run gwet_ac1_table(gwet_ac1,stderr,conf_int,p_value,xtab,,,); print gwet_ac1,stderr,conf_int,p_value; #====================================================================================== */ start gwet_ac1_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); n = sum(x2Tab); /* number of subjects */ f = n/Npop; /* finite population correction */ q = ncol(x2Tab); /* number of categories */ pa = sum(weights # x2Tab/n); /* percent agreement */ pk_ = (x2Tab*repeat(1,q))/n; p_l = t((t(repeat(1,q))*x2Tab)/n); pi_k = (pk_+p_l)/2; tw = sum(weights); pe = tw # sum(pi_k #(1-pi_k))/(q*(q-1)); gwet_ac1 = (pa - pe)/(1 - pe); /* gwet's ac1/ac2 coefficint */ /* calculation of variance - standard error - confidence interval - p-value */ pkl = x2Tab/n; /* p_{kl} */ sum1 = 0; do k=1 to q; do l=1 to q; sum1 = sum1 + pkl[k,l] * (weights[k,l]-2*(1-gwet_ac1)*tw*(1-(pi_k[k] + pi_k[l])/2)/(q*(q-1)))##2; end; end; var_gwet = ((1-f)/(n*(1-pe)**2)) * (sum1 - (pa-2*(1-gwet_ac1)*pe)##2); stderr = sqrt(var_gwet); /* ac1's standard error */ p_value = 2*(1-cdf('T',gwet_ac1/stderr,n-1)); lcb = gwet_ac1 - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */ ucb = min(1,gwet_ac1 + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */ conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")"; return([gwet_ac1,stderr,conf_int,p_value]); finish; /* #==================================================================================== #kappa2_table: Cohen's kappa (Cohen(1960)) coefficient, its standard error and associated confidence interval for 2 raters when input dataset is a contingency table with no missing rating. (Author: Kilem L. Gwet, PhD - gwet@agreestat.com) #------------- #The input data "x2Tab" is a qxq contingency table showing the distribution of subjects by rater, when q is the number of categories. (1) Output parameters: kappa,stderr,conf_int,p_value, (2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7 (3) Examples: run kappa2_table(kappa,stderr,conf_int,p_value,xtab,,,); print kappa,stderr,conf_int,p_value; #====================================================================================== */ start kappa2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); n = sum(x2Tab); /* number of subjects*/ f = n/Npop; /* finite population correction */ q = nrow(x2Tab); /* number of categories */ pa = sum(weights # x2Tab/n); /* percent agreement */ pk_ = (x2Tab*repeat(1,q))/n; p_l = t((t(repeat(1,q))*x2Tab)/n); pe = sum(weights#(pk_*t(p_l))); kappa = (pa - pe)/(1 - pe); /* weighted kappa */ /* 2 raters special case variance */ pkl = x2Tab/n; pb_k = weights * p_l; pbl_ = t(weights) * pk_; sum1 = 0; do k=1 to q; do l=1 to q; sum1 = sum1 + pkl[k,l]#(weights[k,l]-(1-kappa)#(pb_k[k] + pbl_[l]))##2; end; end; var_kappa = ((1-f)/(n*(1-pe)**2)) # (sum1 - (pa-2*(1-kappa)*pe)**2); stderr = sqrt(var_kappa); /* kappa standard error */ p_value = 2*(1-cdf('T',kappa/stderr,n-1)); lcb = kappa - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */ ucb = min(1,kappa + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */ conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")"; return([kappa,stderr,conf_int,p_value]); finish; /* #==================================================================================== #krippen2_table: Krippendorff's alpha, its standard error and associated confidence interval for 2 raters when input dataset is a contingency table with no missing rating_ (Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) #------------- #The input data "x2Tab" is a qxq contingency table showing the distribution of subjects by rater, when q is the number of categories_ (1) Output parameters: kripp_coeff,stderr,conf_int,p_value, (2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7 (3) Examples: run krippen2_table(kripp_coeff,stderr,conf_int,p_value,xtab,,,); print kripp_coeff,stderr,conf_int,p_value; #====================================================================================== */ start krippen2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); n = sum(x2Tab); /* number of subjects */ f = n/Npop; /* finite population correction */ q = ncol(x2Tab); /* number of categories */ epsi = 1/(2*n); pa0 = sum(weights # x2Tab/n); pa = (1-epsi)*pa0 + epsi; /* percent agreement */ pk_ = (x2Tab*repeat(1,q))/n; p_l = t((t(repeat(1,q))*x2Tab)/n); pi_k = (pk_+p_l)/2; pe = sum(weights#(pi_k*t(pi_k))); kripp_coeff = (pa - pe)/(1 - pe); /*weighted Krippen's alpha coefficint*/ /* calculating variance */ pkl = x2Tab/n; /* p_{kl} */ pb_k = weights * p_l; /*\ov{p}_{+k}*/ pbl_ = t(weights) * pk_; /* \ov{p}_{l+} */ pbk = (pb_k + pbl_)/2; /* \ov{p}_{k} */ kcoeff = (pa0 - pe)/(1 - pe); sum1 = 0; do k=1 to q; do l=1 to q; sum1 = sum1 + pkl[k,l] * (weights[k,l]-(1-kcoeff)*(pbk[k] + pbk[l]))##2; end; end; var_kripp = ((1-f)/(n*(1-pe)##2)) * (sum1 - (pa0-2*(1-kcoeff)*pe)##2); stderr = sqrt(var_kripp); /* Kripp_ alpha's standard error */ p_value = 2*(1-cdf('T',kripp_coeff/stderr,n-1)); lcb = kripp_coeff - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */ ucb = min(1,kripp_coeff + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */ conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")"; return([kripp_coeff,stderr,conf_int,p_value]); finish; /* #==================================================================================== #pa2_table: Percent agreement coefficient, its standard error and associated confidence interval for 2 raters when input dataset is a contingency table with no missing rating_ (Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) #------------- #The input data "x2Tab" is a qxq contingency table showing the distribution of subjects by rater, when q is the number of categories_ (1) Output parameters: kripp_coeff,stderr,conf_int,p_value, (2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7 (3) Examples: run pa2_table(pa,stderr,conf_int,p_value,xtab,,,); print pa,stderr,conf_int,p_value; #====================================================================================== */ start pa2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); n = sum(x2Tab); /* number of subjects */ f = n/Npop; /* finite population correction */ q = ncol(x2Tab); /* number of categories */ pa = sum(weights # x2Tab/n); /* percent agreement */ /* calculation of variance - standard error - confidence interval - p-value */ pkl = x2Tab/n; /*p_{kl}*/ sum1 = 0; do k=1 to q; do l=1 to q; sum1 = sum1 + pkl[k,l] * weights[k,l]##2; end; end; var_pa = ((1-f)/n) * (sum1 - pa##2); stderr = sqrt(var_pa); /* pa standard error */ p_value = 2*(1-cdf('T',pa/stderr,n-1)); lcb = pa - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */ ucb = min(1,pa + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */ conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")"; return([pa,stderr,conf_int,p_value]); finish; /* #==================================================================================== #scott2_table: Scott's pi coefficient (Scott(1955)), its standard error and associated confidence interval for 2 raters when input dataset is a contingency table with no missing rating_ (Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) #------------- #The input data "x2Tab" is a qxq contingency table showing the distribution of subjects by rater, when q is the number of categories_ (1) Output parameters: kappa,stderr,conf_int,p_value, (2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7 (3) Examples: run scott2_table(scott,stderr,conf_int,p_value,xtab,,,); print scott,stderr,conf_int,p_value; #====================================================================================== */ start scott2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); n = sum(x2Tab); /* number of subjects */ f = n/Npop; /* finite population correction */ q = ncol(x2Tab); /* number of categories */ pa = sum(weights # x2Tab/n); /* percent agreement */ pk_ = (x2Tab*repeat(1,q))/n; p_l = t((t(repeat(1,q))*x2Tab)/n); pi_k = (pk_+p_l)/2; pe = sum(weights#(pi_k*t(pi_k))); scott = (pa - pe)/(1 - pe); /* weighted scott's pi coefficint */ /* 2 raters special case variance */ pkl = x2Tab/n; /* p_{kl} */ pb_k = weights * p_l; /* \ov{p}_{+k} */ pbl_ = t(weights) * pk_; /* \ov{p}_{l+} */ pbk = (pb_k + pbl_)/2; /* \ov{p}_{k} */ sum1 = 0; do k=1 to q; do l=1 to q; sum1 = sum1 + pkl[k,l] * (weights[k,l]-(1-scott)*(pbk[k] + pbk[l]))##2; end; end; var_scott = ((1-f)/(n*(1-pe)**2)) * (sum1 - (pa-2*(1-scott)*pe)##2); stderr = sqrt(var_scott); /* Scott's standard error */ p_value = 2*(1-cdf('T',scott/stderr,n-1)); lcb = scott - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */ ucb = min(1,scott + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */ conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")"; return([scott,stderr,conf_int,p_value]); finish;