/*
-------------------------------------------
#						  AGREE_COEFF2.SAS
#					     (July 24, 2021)
#Description: This script file contains a series of SAS/IML functions for computing various agreement coefficients
#		  for 2 raters when the input data file is in the form of a contingency table showing the distribution
#         of subjects by rater, and by category.
#Author: Kilem L. Gwet, Ph.D.
-------------------------------------------
*/
/*- unique_vec: take a vector 'vect' as input and will output a vector 
			    of unique non-missing elements of vect. -*/
start unique_vec(vect);
	uvec = t(unique(vect));/*find unique elements of vectors*/
	vec_miss = cmiss(uvec);/*binary vector =1 if missing =0 otherwise*/
	loc_nmiss = loc(vec_miss=0);/*location of non-missing elements of the vectors*/
	unvec = uvec[loc_nmiss];/* extract non-missing values from vector of unique values*/
	return(unvec);
finish;

/*
#====================================================================================
#bp2_table: Brennan-Prediger coefficient (Brennan & Prediger (1981)), its standard error and associated
	confidence interval for 2 raters when input dataset is a contingency table with no
	missing rating_
		(Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) 
#-------------
#The input data "x2Tab" is a qxq contingency table showing the distribution of subjects 
	by rater, when q is the number of categories_
	(1) Output parameters: bp_coeff,stderr,conf_int,p_value,
	(2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7
	(3) Examples:	run bp2_table(bp_coeff,stderr,conf_int,p_value,xtab,,,);
					print bp_coeff,stderr,conf_int,p_value;
#======================================================================================
*/
start bp2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); 
  n = sum(x2Tab); /* number of subjects */
  f = n/Npop; /* finite population correction  */
  q = ncol(x2Tab); /* number of categories */
  pa = sum(weights # x2Tab/n); /* percent agreement */
  tw = sum(weights);
  pe = tw/(q##2);
  bp_coeff = (pa - pe)/(1 - pe); /* Brennan-Prediger coefficint */
  
	/* calculation of variance - standard error - confidence interval - p-value */
  pkl = x2Tab/n;	/* p_{kl} */
  sum1 = 0;
  do k=1 to q;
	do l=1 to q;
		sum1 = sum1 + pkl[k,l] * weights[k,l]##2;
	end;
  end;
  var_bp = ((1-f)/(n*(1-pe)##2)) * (sum1 - pa##2);
  stderr = sqrt(var_bp); /* bp's standard error */
  p_value = 2*(1-cdf('T',bp_coeff/stderr,n-1));
  lcb = bp_coeff - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */
  ucb = min(1,bp_coeff + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */
  conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")";
  return([bp_coeff,stderr,conf_int,p_value]);
finish;

/*
#====================================================================================
#gwet_ac1_table: Gwet's AC1/Ac2 coefficient (Gwet(2008)), its standard error and associated
	confidence interval for 2 raters when input dataset is a contingency table with no
	missing rating_
		(Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) 
#-------------
#The input data "x2Tab" is a qxq contingency table showing the distribution of subjects 
	by rater, when q is the number of categories_
	(1) Output parameters: gwet_ac1,stderr,conf_int,p_value,
	(2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7
	(3) Examples:	run gwet_ac1_table(gwet_ac1,stderr,conf_int,p_value,xtab,,,);
					print gwet_ac1,stderr,conf_int,p_value;
#======================================================================================
*/

start gwet_ac1_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); 
  n = sum(x2Tab); /* number of subjects */
  f = n/Npop; /* finite population correction  */
  q = ncol(x2Tab); /* number of categories */
  pa = sum(weights # x2Tab/n); /* percent agreement */
 
  pk_ = (x2Tab*repeat(1,q))/n;
  p_l = t((t(repeat(1,q))*x2Tab)/n);
  pi_k = (pk_+p_l)/2;
  tw = sum(weights);
  pe = tw # sum(pi_k #(1-pi_k))/(q*(q-1));
  gwet_ac1 = (pa - pe)/(1 - pe); /* gwet's ac1/ac2 coefficint */
  
	/* calculation of variance - standard error - confidence interval - p-value */
  pkl = x2Tab/n;	/* p_{kl} */	
  sum1 = 0;
  do k=1 to q;
	do l=1 to q;
		sum1 = sum1 + pkl[k,l] * (weights[k,l]-2*(1-gwet_ac1)*tw*(1-(pi_k[k] + pi_k[l])/2)/(q*(q-1)))##2;
	end;
  end;
  var_gwet = ((1-f)/(n*(1-pe)**2)) * (sum1 - (pa-2*(1-gwet_ac1)*pe)##2);
  stderr = sqrt(var_gwet); /* ac1's standard error */
  p_value = 2*(1-cdf('T',gwet_ac1/stderr,n-1));  
  lcb = gwet_ac1 - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */
  ucb = min(1,gwet_ac1 + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */
  conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")";
  return([gwet_ac1,stderr,conf_int,p_value]);
finish;

/*
#====================================================================================
#kappa2_table: Cohen's kappa (Cohen(1960)) coefficient, its standard error and associated
	confidence interval for 2 raters when input dataset is a contingency table with no
	missing rating.
		(Author: Kilem L. Gwet, PhD - gwet@agreestat.com) 
#-------------
#The input data "x2Tab" is a qxq contingency table showing the distribution of subjects 
	by rater, when q is the number of categories.
	(1) Output parameters: kappa,stderr,conf_int,p_value,
	(2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7
	(3) Examples:	run kappa2_table(kappa,stderr,conf_int,p_value,xtab,,,);
					print kappa,stderr,conf_int,p_value;
#======================================================================================
*/
start kappa2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7);
	n = sum(x2Tab); /* number of subjects*/
	f = n/Npop; /* finite population correction */
	q = nrow(x2Tab); /* number of categories */ 
	pa = sum(weights # x2Tab/n); /* percent agreement */
 
	pk_ = (x2Tab*repeat(1,q))/n;
	p_l = t((t(repeat(1,q))*x2Tab)/n);
	pe = sum(weights#(pk_*t(p_l)));
	kappa = (pa - pe)/(1 - pe); /* weighted kappa */
  
	/* 2 raters special case variance */

	pkl = x2Tab/n;
	pb_k = weights * p_l;
	pbl_ = t(weights) * pk_;
	sum1 = 0;
	do k=1 to q;
		do l=1 to q;
			sum1 = sum1 + pkl[k,l]#(weights[k,l]-(1-kappa)#(pb_k[k] + pbl_[l]))##2;
		end;
	end;
	var_kappa = ((1-f)/(n*(1-pe)**2)) # (sum1 - (pa-2*(1-kappa)*pe)**2);
	stderr = sqrt(var_kappa); /* kappa standard error */
	p_value = 2*(1-cdf('T',kappa/stderr,n-1));  
	lcb = kappa - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */
	ucb = min(1,kappa + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */
	conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")";
	return([kappa,stderr,conf_int,p_value]);
finish;

/*
#====================================================================================
#krippen2_table: Krippendorff's alpha, its standard error and associated
	confidence interval for 2 raters when input dataset is a contingency table with no
	missing rating_
		(Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) 
#-------------
#The input data "x2Tab" is a qxq contingency table showing the distribution of subjects 
	by rater, when q is the number of categories_
	(1) Output parameters: kripp_coeff,stderr,conf_int,p_value,
	(2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7
	(3) Examples:	run krippen2_table(kripp_coeff,stderr,conf_int,p_value,xtab,,,);
					print kripp_coeff,stderr,conf_int,p_value;
#======================================================================================
*/

start krippen2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); 
  n = sum(x2Tab); /* number of subjects */
  f = n/Npop; /* finite population correction  */
  q = ncol(x2Tab); /* number of categories */
  epsi = 1/(2*n);
  pa0 = sum(weights # x2Tab/n);
  pa = (1-epsi)*pa0 + epsi; /* percent agreement */ 
  pk_ = (x2Tab*repeat(1,q))/n;
  p_l = t((t(repeat(1,q))*x2Tab)/n);
  pi_k = (pk_+p_l)/2;
  pe = sum(weights#(pi_k*t(pi_k)));
  kripp_coeff = (pa - pe)/(1 - pe); /*weighted Krippen's alpha coefficint*/
  
	/* calculating variance */

  pkl = x2Tab/n; /* p_{kl} */	
  pb_k = weights * p_l; /*\ov{p}_{+k}*/
  pbl_ = t(weights) * pk_; /* \ov{p}_{l+} */
  pbk  = (pb_k + pbl_)/2; /* \ov{p}_{k} */
  kcoeff = (pa0 - pe)/(1 - pe);
  sum1 = 0;
  do k=1 to q;
	do l=1 to q;
		sum1 = sum1 + pkl[k,l] * (weights[k,l]-(1-kcoeff)*(pbk[k] + pbk[l]))##2;
	end;
  end;
  var_kripp = ((1-f)/(n*(1-pe)##2)) * (sum1 - (pa0-2*(1-kcoeff)*pe)##2);
  stderr = sqrt(var_kripp); /* Kripp_ alpha's standard error */
  p_value = 2*(1-cdf('T',kripp_coeff/stderr,n-1));
  lcb = kripp_coeff - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */
  ucb = min(1,kripp_coeff + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */
  conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")";
  return([kripp_coeff,stderr,conf_int,p_value]);
finish;

/*
#====================================================================================
#pa2_table: Percent agreement coefficient, its standard error and associated
	confidence interval for 2 raters when input dataset is a contingency table with no
	missing rating_
		(Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) 
#-------------
#The input data "x2Tab" is a qxq contingency table showing the distribution of subjects 
	by rater, when q is the number of categories_
	(1) Output parameters: kripp_coeff,stderr,conf_int,p_value,
	(2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7
	(3) Examples:	run pa2_table(pa,stderr,conf_int,p_value,xtab,,,);
					print pa,stderr,conf_int,p_value;
#======================================================================================
*/
start pa2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); 
  n = sum(x2Tab); /* number of subjects */
  f = n/Npop; /* finite population correction */
  q = ncol(x2Tab); /* number of categories */
  pa = sum(weights # x2Tab/n); /* percent agreement */
  
  /* calculation of variance - standard error - confidence interval - p-value */
  
  pkl = x2Tab/n; /*p_{kl}*/
  sum1 = 0;
  do k=1 to q;
    do l=1 to q;
      sum1 = sum1 + pkl[k,l] * weights[k,l]##2;
    end;
  end;
  var_pa = ((1-f)/n) * (sum1 - pa##2);
  stderr = sqrt(var_pa); /* pa standard error */
  p_value = 2*(1-cdf('T',pa/stderr,n-1));
  lcb = pa - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */
  ucb = min(1,pa + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */
  conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")";
  return([pa,stderr,conf_int,p_value]);
finish;

/*
#====================================================================================
#scott2_table: Scott's pi coefficient (Scott(1955)), its standard error and associated
	confidence interval for 2 raters when input dataset is a contingency table with no
	missing rating_
		(Author: Kilem L_ Gwet, PhD - gwet@agreestat_com) 
#-------------
#The input data "x2Tab" is a qxq contingency table showing the distribution of subjects 
	by rater, when q is the number of categories_
	(1) Output parameters: kappa,stderr,conf_int,p_value,
	(2) Input parameters: x2Tab (mandatory),weights=I(ncol(x2Tab)),conflev=0_95,Npop=10**7
	(3) Examples:	run scott2_table(scott,stderr,conf_int,p_value,xtab,,,);
					print scott,stderr,conf_int,p_value;
#======================================================================================
*/
start scott2_table(x2Tab,weights=I(ncol(x2Tab)),conflev=0.95,Npop=10**7); 
	n = sum(x2Tab); /* number of subjects */
	f = n/Npop; /* finite population correction */
	q = ncol(x2Tab); /* number of categories */
	pa = sum(weights # x2Tab/n); /* percent agreement */
	pk_ = (x2Tab*repeat(1,q))/n;
	p_l = t((t(repeat(1,q))*x2Tab)/n);
	pi_k = (pk_+p_l)/2;
	pe = sum(weights#(pi_k*t(pi_k)));
	scott = (pa - pe)/(1 - pe); /* weighted scott's pi coefficint */
  
	/* 2 raters special case variance */
	pkl = x2Tab/n;	/* p_{kl} */	
	pb_k = weights * p_l; /* \ov{p}_{+k} */
	pbl_ = t(weights) * pk_; /* \ov{p}_{l+} */
	pbk  = (pb_k + pbl_)/2; /* \ov{p}_{k} */
	sum1 = 0;
	do k=1 to q;
		do l=1 to q;
			sum1 = sum1 + pkl[k,l] * (weights[k,l]-(1-scott)*(pbk[k] + pbk[l]))##2;
		end;
	end;
	var_scott = ((1-f)/(n*(1-pe)**2)) * (sum1 - (pa-2*(1-scott)*pe)##2);
	stderr = sqrt(var_scott); /* Scott's standard error */
	p_value = 2*(1-cdf('T',scott/stderr,n-1));
	lcb = scott - stderr*quantile('T',1-(1-conflev)/2,n-1); /* lower confidence bound */
	ucb = min(1,scott + stderr*quantile('T',1-(1-conflev)/2,n-1)); /* upper confidence bound */
	conf_int = "("+strip(char(round(lcb,1/10**5)))+","+strip(char(round(ucb,1/10**5)))+")";
	return([scott,stderr,conf_int,p_value]);
finish;