#							paired t-test for agreement coefficients.r
#						 		  (February 20, 2021)
#Description: This script file contains a series of R functions for testing the difference between 2 agreement
#		 coefficients for statistical significance. The approach used here is based on the linearization method 
#    proposed Gwet (2016).A function is proposed for each agreement coefficient (Gwet's AC2, Fleiss' Kappa, 
#    Conger's Kappa, Krippendorff's Alpha, and Brennan-Prediger Coefficient.
#
#    Each function has only 2 required parameters, which are the 2 data frames containing the 2 sets of ratings for
#    the 2 groups of raters under comparison. Each input data file is in the form of nxr matrix or data frame showing 
#    the actual ratings each rater (column) assigned to each subject (in row). A typical table entry (i,g) represents 
#    the rating associated with subject i and rater g. 
#
#Author: Kilem L. Gwet, Ph.D.  (gwet@agreestat.com)
#
#
#     EXAMPLES OF SIMPLE CALLS OF THE 5 FUNCTIONS
#     ===========================================
# > ttest.fleiss(g1.ratings,g2.ratings)
# > ttest.ac2(g1.ratings,g2.ratings)
# > ttest.alpha(g1.ratings,g2.ratings)
# > ttest.conger(g1.ratings,g2.ratings)
# > ttest.bp(g1.ratings,g2.ratings)


#=====================================================================================
#ttest.fleiss: This function performs the paired t-test for testing the difference  
#              between two correlated Fleiss kappa coefficients for statistical significance. It
#              implements the linearization method of Gwet (2016).
#-------------
# This function takes 2 required parameters, which are the 2 groups of raters being compared:
# "g1.ratings" and "g2.ratings." Both datasets must have the exact same number of rows, and each
# column represents one rater and contains its ratings (numeric or alphabetic). 
# The user must exclude all subjects that are not rated by any rater.
# Bibliography:
# ------------
# Fleiss, J. L. (1971). Measuring nominal scale agreement among many raters.
#                        Psychological Bulletin, 76, 378-382. 
# Gwet, K. L. (2016). "Testing the Difference of Correlated Agreement Coefficients for Statistical Significance,
#                      Educational and Psychological Measurement, Vol 76(4) 609-637.
#======================================================================================
ttest.fleiss <- function(g1.ratings,g2.ratings,weights.p="unweighted",conflev=0.95,N=Inf,print=TRUE){ 
  n2 <- nrow(g2.ratings)
  n1 <- nrow(g1.ratings)
  if (n2==n1){
    coeff2.i<-fleiss.linear.i(g2.ratings,weights=weights.p,conflev,N)
    coeff1.i<-fleiss.linear.i(g1.ratings,weights=weights.p,conflev,N)
    di = coeff2.i-coeff1.i
    fleiss.coeff1 = mean(coeff1.i)
    fleiss.coeff2 = mean(coeff2.i)
    coeff.diff <- fleiss.coeff2-fleiss.coeff1
    std.err <- sqrt(var(di)/n1)
    t.stat <- (fleiss.coeff2-fleiss.coeff1)/std.err
    p.value <- 2*(1-pt(abs(t.stat),n1-1))
    if (print==TRUE){
      cat("PAIRED T-TEST FOR TESTING THE DIFFERENCE BETWEEN 2 FLEISS AGREEMENT COEFFICIENTS\n")
      cat("--------------------------------------------------------------------------------\n")
      cat("Fleiss Kappa Coefficients: (Group 1: ", fleiss.coeff1, ") -- (Group 2: ",fleiss.coeff2,")\n")
      cat("Standard Error of the Differences: ", std.err,"\n")
      cat("Test Statistic: T= ",t.stat,"\n")
      cat("P-value: ",p.value) 
    }
    invisible(c(fleiss.coeff1, fleiss.coeff2,coeff.diff,std.err,t.stat,p.value))
  }else{
    cat("Both datasets must have the same number of subjects. One has ",n1," subjects, while the other has ", n2," subjects.")
    invisible(-1)
  }
}
#=====================================================================================
#ttest.ac2: This function performs the paired t-test for testing the difference  
#              between two correlated Gwet's AC2 coefficients for statistical significance. It
#              implements the linearization method of Gwet (2016).
#-------------
# This function takes 2 required parameters, which are the 2 groups of raters being compared:
# "g1.ratings" and "g2.ratings." Both datasets must have the exact same number of rows, and each
# column represents one rater and contains its ratings (numeric or alphabetic). 
# The user must exclude all subjects that are not rated by any rater.
# Bibliography:
# ------------
# Gwet, K. L. (2008). Computing inter-rater reliability and its variance in the presence of high agreement.
#                     British Journal of Mathematical and Statistical Psychology, 61, 29-48.
# Gwet, K. L. (2016). "Testing the Difference of Correlated Agreement Coefficients for Statistical Significance,
#                      Educational and Psychological Measurement, Vol 76(4) 609-637.
#======================================================================================
ttest.ac2 <- function(g1.ratings,g2.ratings,weights="unweighted",conflev=0.95,N=Inf,print=TRUE){ 
  n2 <- nrow(g2.ratings)
  n1 <- nrow(g1.ratings)
  if (n2==n1){
    coeff2.i<-ac2.linear.i(g2.ratings,weights,conflev,N)
    coeff1.i<-ac2.linear.i(g1.ratings,weights,conflev,N)
    di = coeff2.i-coeff1.i
    ac2.coeff1 = mean(coeff1.i)
    ac2.coeff2 = mean(coeff2.i)
    coeff.diff <- ac2.coeff2-ac2.coeff1
    std.err <- sqrt(var(di)/n1)
    t.stat <- (ac2.coeff2-ac2.coeff1)/std.err
    p.value <- 2*(1-pt(abs(t.stat),n1-1))
    if (print==TRUE){
      cat("PAIRED T-TEST FOR TESTING THE DIFFERENCE BETWEEN 2 GWET's AC2 AGREEMENT COEFFICIENTS\n")
      cat("------------------------------------------------------------------------------------\n")
      cat("AC2 Coefficients: (Group 1: ", ac2.coeff1, ") -- (Group 2: ",ac2.coeff2,")\n")
      cat("Standard Error of the Differences: ", std.err,"\n")
      cat("Test Statistic: T= ",t.stat,"\n")
      cat("P-value: ",p.value)
    }
    invisible(c(ac2.coeff1, ac2.coeff2,coeff.diff,std.err,t.stat,p.value))
  }else{
    cat("Both datasets must have the same number of subjects. One has ",n1," subjects, while the other has ", n2," subjects.")
    invisible(-1)
  }
}

#=====================================================================================
#ttest.alpha: This function performs the paired t-test for testing the difference  
#              between two correlated Krippendorff's alpha coefficients for statistical significance. It
#              implements the linearization method of Gwet (2016).
#-------------
# This function takes 2 required parameters, which are the 2 groups of raters being compared:
# "g1.ratings" and "g2.ratings." Both datasets must have the exact same number of rows, and each
# column represents one rater and contains its ratings (numeric or alphabetic). 
# The user must exclude all subjects that are not rated by any rater.
# Bibliography:
# ------------
# Krippendorff, K. (1970). Estimating the reliability, systematic error, and random error of interval data.
#                          Educational and Psychological Measurement, 30, 61-70.
# Gwet, K. L. (2016). "Testing the Difference of Correlated Agreement Coefficients for Statistical Significance,
#                      Educational and Psychological Measurement, Vol 76(4) 609-637.
#======================================================================================
ttest.alpha <- function(g1.ratings,g2.ratings,weights="unweighted",conflev=0.95,N=Inf,print=TRUE){ 
  n2 <- nrow(g2.ratings)
  n1 <- nrow(g1.ratings)
  if (n2==n1){
    coeff2.i<-alpha.linear.i(g2.ratings,weights,conflev,N)
    coeff1.i<-alpha.linear.i(g1.ratings,weights,conflev,N)
    alpha.coeff1 = mean(coeff1.i)
    alpha.coeff2 = mean(coeff2.i)
    coeff.diff <- alpha.coeff2-alpha.coeff1
    n1i <- length(coeff1.i)
    n2i <- length(coeff2.i)
    if (n1i==n2i){
      di = coeff2.i-coeff1.i
      std.err <- sqrt(var(di)/n1)
      t.stat <- (alpha.coeff2-alpha.coeff1)/std.err
      p.value <- 2*(1-pt(abs(t.stat),n1-1))
    }else{
      std.err <- NA
      t.stat <- NA
      p.value <- NA
    }
    if (print==TRUE){
      cat("PAIRED T-TEST FOR TESTING THE DIFFERENCE BETWEEN 2 KRIPPENDORFF's Alpha COEFFICIENTS\n")
      cat("------------------------------------------------------------------------------------\n")
      cat("Alpha Coefficients: (Group 1: ", alpha.coeff1, ") -- (Group 2: ",alpha.coeff2,")\n")
      cat("Standard Error of the Differences: ", std.err,"\n")
      cat("Test Statistic: T= ",t.stat,"\n")
      cat("P-value: ",p.value)
    }
    invisible(c(alpha.coeff1, alpha.coeff2,coeff.diff,std.err,t.stat,p.value))
  }else{
    cat("Both datasets must have the same number of subjects. One has ",n1," subjects, while the other has ", n2," subjects.")
    invisible(-1)
  }
}

#=====================================================================================
#ttest.conger: This function performs the paired t-test for testing the difference  
#              between two correlated Conger's Kappa coefficients for statistical significance. It
#              implements the linearization method of Gwet (2016).
#-------------
# This function takes 2 required parameters, which are the 2 groups of raters being compared:
# "g1.ratings" and "g2.ratings." Both datasets must have the exact same number of rows, and each
# column represents one rater and contains its ratings (numeric or alphabetic). 
# The user must exclude all subjects that are not rated by any rater.
# Bibliography:
# ------------
# Conger, A. J. (1980). Integration and generalization of kappas for multiple raters. 
#                       Psychological Bulletin, 88, 322-328. 
# Gwet, K. L. (2016). "Testing the Difference of Correlated Agreement Coefficients for Statistical Significance,
#                      Educational and Psychological Measurement, Vol 76(4) 609-637.
#======================================================================================
ttest.conger <- function(g1.ratings,g2.ratings,weights="unweighted",conflev=0.95,N=Inf,print=TRUE){ 
  n2 <- nrow(g2.ratings)
  n1 <- nrow(g1.ratings)
  if (n2==n1){
    coeff1.i<-conger.linear.i(g1.ratings,weights,conflev,N)
    coeff2.i<-conger.linear.i(g2.ratings,weights,conflev,N)
    di = coeff2.i-coeff1.i
    conger.coeff1 = mean(coeff1.i)
    conger.coeff2 = mean(coeff2.i)

    coeff.diff <- conger.coeff2-conger.coeff1
    std.err <- sqrt(var(di)/n1)
    t.stat <- (conger.coeff2-conger.coeff1)/std.err
    p.value <- 2*(1-pt(abs(t.stat),n1-1))
    if (print==TRUE){
      cat("PAIRED T-TEST FOR TESTING THE DIFFERENCE BETWEEN 2 CONGER's KAPPA COEFFICIENTS\n")
      cat("------------------------------------------------------------------------------\n")
      cat("COnger's Coefficients: (Group 1: ", conger.coeff1, ") -- (Group 2: ",conger.coeff2,")\n")
      cat("Standard Error of the Differences: ", std.err,"\n")
      cat("Test Statistic: T= ",t.stat,"\n")
      cat("P-value: ",p.value)
    }
    invisible(c(conger.coeff1, conger.coeff2,coeff.diff,std.err,t.stat,p.value))
  }else{
    cat("Both datasets must have the same number of subjects. One has ",n1," subjects, while the other has ", n2," subjects.")
    invisible(-1)
  }
}

#=====================================================================================
#ttest.bp: This function performs the paired t-test for testing the difference  
#              between two correlated Brennan-Prediger coefficients for statistical significance. It
#              implements the linearization method of Gwet (2016).
#-------------
# This function takes 2 required parameters, which are the 2 groups of raters being compared:
# "g1.ratings" and "g2.ratings." Both datasets must have the exact same number of rows, and each
# column represents one rater and contains its ratings (numeric or alphabetic). 
# The user must exclude all subjects that are not rated by any rater.
# Bibliography:
# ------------
# Brennan, R.L. & Prediger, D. J. (1981). Coefficient kappa: some uses, misuses, and alternatives.
#                       Educational and Psychological Measurement, 41, 687-699. 
# Gwet, K. L. (2016). "Testing the Difference of Correlated Agreement Coefficients for Statistical Significance,
#                      Educational and Psychological Measurement, Vol 76(4) 609-637.
#======================================================================================
ttest.bp <- function(g1.ratings,g2.ratings,weights="unweighted",conflev=0.95,N=Inf,print=TRUE){ 
  n2 <- nrow(g2.ratings)
  n1 <- nrow(g1.ratings)
  if (n2==n1){
    coeff2.i<-bp.linear.i(g2.ratings,weights,conflev,N)
    coeff1.i<-bp.linear.i(g1.ratings,weights,conflev,N)
    di = coeff2.i-coeff1.i
    bp.coeff1 = mean(coeff1.i)
    bp.coeff2 = mean(coeff2.i)
    coeff.diff <- bp.coeff2-bp.coeff1
    std.err <- sqrt(var(di)/n1)
    t.stat <- (bp.coeff2-bp.coeff1)/std.err
    p.value <- 2*(1-pt(abs(t.stat),n1-1))
    if (print==TRUE){
      cat("PAIRED T-TEST FOR TESTING THE DIFFERENCE BETWEEN 2 BRENNAN-PREDIGER COEFFICIENTS\n")
      cat("--------------------------------------------------------------------------------\n")
      cat("Brennan-Prediger Coefficients: (Group 1: ", bp.coeff1, ") -- (Group 2: ",bp.coeff2,")\n")
      cat("Standard Error of the Differences: ", std.err,"\n")
      cat("Test Statistic: T= ",t.stat,"\n")
      cat("P-value: ",p.value)
    }
    invisible(c(bp.coeff1, bp.coeff2,coeff.diff,std.err,t.stat,p.value))
  }else{
    cat("Both datasets must have the same number of subjects. One has ",n1," subjects, while the other has ", n2," subjects.")
    invisible(-1)
  }
}

# ==============================================================
# This is an r function for trimming leading and trealing blanks
# ==============================================================
trim <- function( x ) {
  gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }

#=====================================================================================
#fleiss.linear.i: This function produces all n linear components associated with  
#		           the n subjects, and the linearized version of the Fleiss' kappa coefficient.
#-------------
#The input data "ratings" is a nxr data frame of raw alphanumeric ratings
#from n subjects and r raters. Exclude all subjects that are not rated by any rater.
#Bibliography:
#Fleiss, J. L. (1981). Statistical Methods for Rates and Proportions. John Wiley & Sons.
#======================================================================================
fleiss.linear.i <- function(ratings,weights="unweighted",conflev=0.95,N=Inf){ 
  ratings.mat <- as.matrix(ratings) 
  if (is.character(ratings.mat)){ratings.mat <- toupper(trim(ratings.mat))}
  n <- nrow(ratings.mat) # number of subjects
  r <- ncol(ratings.mat) # number of raters
  f <- n/N # final population correction 
  
  # creating a vector containing all categories used by the raters
  
  categ.init <- unique(as.vector(ratings.mat))
  if (is.numeric(categ.init)){
    categ <- sort(as.vector(na.omit(categ.init)))
  }else{
    categ.init <- trim(categ.init) #trim vector elements to remove leading and trailing blanks
    categ <- categ.init[nchar(categ.init)>0]
    categ <- sort(categ)
  }
  q <- length(categ)
  
  # creating the weights matrix
  
  if (is.character(weights)){
    if (weights=="quadratic")
      weights.mat<-quadratic.weights(categ)
    else if (weights=="ordinal")
      weights.mat<-ordinal.weights(categ)
    else if (weights=="linear")
      weights.mat<-linear.weights(categ)
    else if (weights=="radical")
      weights.mat<-radical.weights(categ)
    else if (weights=="ratio")
      weights.mat<-ratio.weights(categ)
    else if (weights=="circular")
      weights.mat<-circular.weights(categ)
    else if (weights=="bipolar")
      weights.mat<-bipolar.weights(categ)
    else weights.mat<-identity.weights(categ)
  }else weights.mat= as.matrix(weights)
  
  # creating the nxq agreement matrix representing the distribution of raters by subjects and category
  
  agree.mat <- matrix(0,nrow=n,ncol=q)
  for(k in 1:q){
    if (is.numeric(ratings.mat)){
      k.mis <-(ratings.mat==categ[k])
      in.categ.k <- replace(k.mis,is.na(k.mis),FALSE)
      agree.mat[,k] <- in.categ.k%*%rep(1,r) 
    }else{
      in.k <- (trim(ratings.mat)==categ[k])
      in.k[is.na(in.k)] <- FALSE 
      agree.mat[,k] <- in.k%*%rep(1,r)
    }
  }
  agree.mat.w <- t(weights.mat%*%t(agree.mat))
  
  # calculating fleiss's generalized kappa coefficient
  
  ri.vec <- agree.mat%*%rep(1,q)
  sum.q <- (agree.mat*(agree.mat.w-1))%*%rep(1,q)
  n2more <- sum(ri.vec>=2)
  pa <- sum(sum.q[ri.vec>=2]/((ri.vec*(ri.vec-1))[ri.vec>=2]))/n2more
  
  pi.vec <- t(t(rep(1/n,n))%*%(agree.mat/(ri.vec%*%t(rep(1,q)))))
  pe <- sum(weights.mat * (pi.vec%*%t(pi.vec)))
  fleiss.kappa <- (pa-pe)/(1-pe)
  
  # calculating variance, stderr & p-value of gwet's ac1 coefficient
  
  den.ivec <- ri.vec*(ri.vec-1)
  den.ivec <- den.ivec - (den.ivec==0) # this operation replaces each 0 value with -1 to make the next ratio calculation always possible.
  pa.ivec <- sum.q/den.ivec
  
  pe.r2 <- pe*(ri.vec>=2)
  kappa.ivec <- (n/n2more)*(pa.ivec-pe.r2)/(1-pe)
  pi.vec.wk. <- weights.mat%*%pi.vec
  pi.vec.w.k <- t(weights.mat)%*%pi.vec
  pi.vec.w <- (pi.vec.wk. + pi.vec.w.k)/2
  
  pe.ivec <- (agree.mat%*%pi.vec.w)/ri.vec
  kappa.ivec.x <- kappa.ivec - 2*(1-fleiss.kappa) * (pe.ivec-pe)/(1-pe)
  invisible(kappa.ivec.x)
}
#===========================================================================================
#ac2.linear.i: This function produces all n linear components associated with  
#		           the n subjects, and the linearized version of the ac2 coefficient.
#-------------
#The input data "ratings" is a nxr data frame of raw alphanumeric ratings
#from n subjects and r raters. Exclude all subjects that are not rated by any rater.
#Bibliography:
#Gwet, K. L. (2008). ``Computing inter-rater reliability and its variance in the presence of high
#		agreement." British Journal of Mathematical and Statistical Psychology, 61, 29-48.
#============================================================================================
ac2.linear.i <- function(ratings,weights="unweighted",conflev=0.95,N=Inf){ 
  ratings.mat <- as.matrix(ratings)
  if (is.character(ratings.mat)){ratings.mat <- toupper(trim(ratings.mat))}
  
  n <- nrow(ratings.mat) # number of subjects
  r <- ncol(ratings.mat) # number of raters
  f <- n/N # final population correction 
  
  # creating a vector containing all categories used by the raters
  
  categ.init <- unique(as.vector(ratings.mat))
  if (is.numeric(categ.init))
    categ <- sort(as.vector(na.omit(categ.init)))
  else{
    categ.init <- trim(categ.init) #trim vector elements to remove leading and trailing blanks
    categ <- categ.init[nchar(categ.init)>0]
    categ <- sort(categ)
  }
  q <- length(categ)
  
  # creating the weights matrix
  
  if (is.character(weights)){
    if (weights=="quadratic")
      weights.mat<-quadratic.weights(categ)
    else if (weights=="ordinal")
      weights.mat<-ordinal.weights(categ)
    else if (weights=="linear")
      weights.mat<-linear.weights(categ)
    else if (weights=="radical")
      weights.mat<-radical.weights(categ)
    else if (weights=="ratio")
      weights.mat<-ratio.weights(categ)
    else if (weights=="circular")
      weights.mat<-circular.weights(categ)
    else if (weights=="bipolar")
      weights.mat<-bipolar.weights(categ)
    else weights.mat<-identity.weights(categ)
  }else weights.mat= as.matrix(weights)
  
  # creating the nxq agreement matrix representing the distribution of raters by subjects and category
  
  agree.mat <- matrix(0,nrow=n,ncol=q)
  for(k in 1:q){
    if (is.numeric(ratings.mat)){
      k.mis <-(ratings.mat==categ[k])
      in.categ.k <- replace(k.mis,is.na(k.mis),FALSE)
      agree.mat[,k] <- in.categ.k%*%rep(1,r) 
    }else{
      in.k <- (trim(ratings.mat)==categ[k])
      in.k[is.na(in.k)] <- FALSE 
      agree.mat[,k] <- in.k%*%rep(1,r)
    }
  }
  agree.mat.w <- t(weights.mat%*%t(agree.mat))
  
  # calculating gwet's ac1 coefficient
  
  ri.vec <- agree.mat%*%rep(1,q)
  sum.q <- (agree.mat*(agree.mat.w-1))%*%rep(1,q)
  n2more <- sum(ri.vec>=2)
  pa <- sum(sum.q[ri.vec>=2]/((ri.vec*(ri.vec-1))[ri.vec>=2]))/n2more
  
  pi.vec <- t(t(rep(1/n,n))%*%(agree.mat/(ri.vec%*%t(rep(1,q)))))
  if (q>1){
    pe <- sum(weights.mat) * sum(pi.vec*(1-pi.vec)) / (q*(q-1))
  }else{pe <- 0}
  gwet.ac1 <- (pa-pe)/(1-pe)
  
  # calculating variance, stderr & p-value of gwet's ac1 coefficient
  
  den.ivec <- ri.vec*(ri.vec-1)
  den.ivec <- den.ivec - (den.ivec==0) # this operation replaces each 0 value with -1 to make the next ratio calculation always possible.
  pa.ivec <- sum.q/den.ivec
  
  pe.r2 <- pe*(ri.vec>=2)
  ac1.ivec <- (n/n2more)*(pa.ivec-pe.r2)/(1-pe)
  if (q>1){
    pe.ivec <- (sum(weights.mat)/(q*(q-1))) * (agree.mat%*%(1-pi.vec))/ri.vec
  }else{
    pe.ivec <- matrix(0,n,1)
  }
  ac1.ivec.x <- ac1.ivec - 2*(1-gwet.ac1) * (pe.ivec-pe)/(1-pe)
  invisible(ac1.ivec.x)
}

#=====================================================================================
#alpha.linear.i: This function produces all n linear components associated with  
#		           the n subjects, and the linearized version of Krippendorff's alpha coefficient.
#-------------
#The algorithm used to compute krippendorff's alpha is very different from anything that was published on this topic. Instead,
#it follows the equations presented by K. Gwet (2014, p. 88)
#The input data "ratings" is a nxr data frame of raw alphanumeric ratings
#from n subjects and r raters. Exclude all subjects that are not rated by any rater.
#Bibliography:
#Gwet, K. (2014). Handbook of Inter-Rater Reliability: The Definitive Guide to Measuring the Extent of Agreement Among 
#	Multiple Raters, 4th Edition.  Advanced Analytics, LLC; 4th edition (September 10, 2014)
#Krippendorff (1970). "Bivariate agreement coefficients for reliability of data." Sociological Methodology,2,139-150
#Krippendorff (1980). Content analysis: An introduction to its methodology (2nd ed.), New-bury Park, CA: Sage.
#======================================================================================
alpha.linear.i <- function(ratings,weights="unweighted",conflev=0.95,N=Inf){ 
  ratings.mat <- as.matrix(ratings) 
  if (is.character(ratings.mat)){ratings.mat <- toupper(trim(ratings.mat))}
  n <- nrow(ratings.mat) # number of subjects
  r <- ncol(ratings.mat) # number of raters
  f <- n/N # final population correction 
  
  # creating a vector containing all categories used by the raters
  
  categ.init <- unique(as.vector(ratings.mat))
  if (is.numeric(categ.init)){
    categ <- sort(as.vector(na.omit(categ.init)))
  }else{
    categ.init <- trim(categ.init) #trim vector elements to remove leading and trailing blanks
    categ <- categ.init[nchar(categ.init)>0]
    categ <- sort(categ)
  }
  q <- length(categ)
  
  # creating the weights matrix
  
  if (is.character(weights)){
    if (weights=="quadratic")
      weights.mat<-quadratic.weights(categ)
    else if (weights=="ordinal")
      weights.mat<-ordinal.weights(categ)
    else if (weights=="linear")
      weights.mat<-linear.weights(categ)
    else if (weights=="radical")
      weights.mat<-radical.weights(categ)
    else if (weights=="ratio")
      weights.mat<-ratio.weights(categ)
    else if (weights=="circular")
      weights.mat<-circular.weights(categ)
    else if (weights=="bipolar")
      weights.mat<-bipolar.weights(categ)
    else weights.mat<-identity.weights(categ)
  }else weights.mat= as.matrix(weights)
  
  # creating the nxq agreement matrix representing the distribution of raters by subjects and category
  
  agree.mat <- matrix(0,nrow=n,ncol=q)
  for(k in 1:q){
    if (is.numeric(ratings.mat)){
      k.mis <-(ratings.mat==categ[k])
      in.categ.k <- replace(k.mis,is.na(k.mis),FALSE)
      agree.mat[,k] <- in.categ.k%*%rep(1,r) 
    }else{
      in.k <- (trim(ratings.mat)==categ[k])
      in.k[is.na(in.k)] <- FALSE 
      agree.mat[,k] <- in.k%*%rep(1,r)
    }
  }
  agree.mat.w <- t(weights.mat%*%t(agree.mat))
  
  # calculating krippendorff's alpha coefficient
  
  ri.vec <- agree.mat%*%rep(1,q)
  agree.mat<-as.matrix(agree.mat[(ri.vec>=2),])
  agree.mat.w <- agree.mat.w[(ri.vec>=2),]
  ri.vec <- ri.vec[(ri.vec>=2)]
  ri.mean <- mean(ri.vec)
  n <- nrow(agree.mat)
  epsi <- 1/sum(ri.vec)
  sum.q <- (agree.mat*(agree.mat.w-1))%*%as.matrix(rep(1,q))
  pa <- (1-epsi)* sum(sum.q/(ri.mean*(ri.vec-1)))/n + epsi
  pi.vec <- t(t(rep(1/n,n))%*%(agree.mat/ri.mean))
  pe <- sum(weights.mat * (pi.vec%*%t(pi.vec)))
  krippen.alpha <- (pa-pe)/(1-pe)
  
  # calculating variance, stderr & p-value of gwet's ac1 coefficient
  
  den.ivec <- ri.mean*(ri.vec-1)
  pa.ivec <- sum.q/den.ivec
  pa.v <- mean(pa.ivec)
  pa.ivec <- (1-epsi)*(pa.ivec-pa.v*(ri.vec-ri.mean)/ri.mean) + epsi
  krippen.ivec <- (pa.ivec-pe)/(1-pe)
  pi.vec.wk. <- weights.mat%*%pi.vec
  pi.vec.w.k <- t(weights.mat)%*%pi.vec
  
  pi.vec.w <- (pi.vec.wk. + pi.vec.w.k)/2
  
  pe.ivec <- (agree.mat%*%pi.vec.w)/ri.mean - sum(pi.vec) * (ri.vec-ri.mean)/ri.mean
  krippen.ivec.x <- krippen.ivec - 2*(1-krippen.alpha) * (pe.ivec-pe)/(1-pe)
  invisible(krippen.ivec.x)
}

#===========================================================================================
#conger.linear.i: This function produces all n linear components associated with  
#		           the n subjects, and the linearized version of Conger's kappa coefficient.
#-------------
#The input data "ratings" is a nxr data frame of raw alphanumeric ratings
#from n subjects and r raters. Exclude all subjects that are not rated by any rater.
#Bibliography:
#Conger, A. J. (1980), ``Integration and Generalization of Kappas for Multiple Raters,"
#		Psychological Bulletin, 88, 322-328.
#======================================================================================
conger.linear.i <- function(ratings,weights="unweighted",conflev=0.95,N=Inf){ 
  ratings.mat <- as.matrix(ratings) 
  if (is.character(ratings.mat)){ratings.mat <- toupper(trim(ratings.mat))}
  n <- nrow(ratings.mat) # number of subjects
  r <- ncol(ratings.mat) # number of raters
  f <- n/N # final population correction 
  
  # creating a vector containing all categories used by the raters
  
  categ.init <- unique(as.vector(ratings.mat))
  if (is.numeric(categ.init))
    categ <- sort(as.vector(na.omit(categ.init)))
  else {
    categ.init <- trim(categ.init) #trim vector elements to remove leading and trailing blanks
    categ <- categ.init[nchar(categ.init)>0]
    categ <- sort(categ)
  }
  q <- length(categ)
  
  # creating the weights matrix
  
  if (is.character(weights)){
    if (weights=="quadratic")
      weights.mat<-quadratic.weights(categ)
    else if (weights=="ordinal")
      weights.mat<-ordinal.weights(categ)
    else if (weights=="linear")
      weights.mat<-linear.weights(categ)
    else if (weights=="radical")
      weights.mat<-radical.weights(categ)
    else if (weights=="ratio")
      weights.mat<-ratio.weights(categ)
    else if (weights=="circular")
      weights.mat<-circular.weights(categ)
    else if (weights=="bipolar")
      weights.mat<-bipolar.weights(categ)
    else weights.mat<-identity.weights(categ)
  }else weights.mat= as.matrix(weights)

  # creating the nxq agreement matrix representing the distribution of raters by subjects and category
  
  agree.mat <- matrix(0,nrow=n,ncol=q)
  for(k in 1:q){
    if (is.numeric(ratings.mat)){
      k.mis <-(ratings.mat==categ[k])
      in.categ.k <- replace(k.mis,is.na(k.mis),FALSE)
      agree.mat[,k] <- in.categ.k%*%rep(1,r) 
    }else{
      in.k <- (trim(ratings.mat)==categ[k])
      in.k[is.na(in.k)] <- FALSE 
      agree.mat[,k] <- in.k%*%rep(1,r)
    }
  }
  agree.mat.w <- t(weights.mat%*%t(agree.mat))
  
  # creating the rxq rater-category matrix representing the distribution of subjects by rater and category
  
  classif.mat <- matrix(0,nrow=r,ncol=q)
  for(k in 1:q){
    if (is.numeric(ratings.mat)){
      with.mis <-(t(ratings.mat)==categ[k])
      without.mis <- replace(with.mis,is.na(with.mis),FALSE)
      classif.mat[,k] <- without.mis%*%rep(1,n)
    }else{
      incat.k <- (t(ratings.mat)==categ[k])
      incat.k[is.na(incat.k)] <- FALSE
      classif.mat[,k] <- incat.k%*%rep(1,n)
    }
  }
  
  # calculating conger's kappa coefficient
  
  ri.vec <- agree.mat%*%rep(1,q)
  sum.q <- (agree.mat*(agree.mat.w-1))%*%rep(1,q)
  n2more <- sum(ri.vec>=2)
  pa <- sum(sum.q[ri.vec>=2]/((ri.vec*(ri.vec-1))[ri.vec>=2]))/n2more
  
  ng.vec <- classif.mat%*%rep(1,q)
  pgk.mat <- classif.mat/(ng.vec%*%rep(1,q))
  p.mean.k <- (t(pgk.mat)%*%rep(1,r))/r 
  s2kl.mat <- (t(pgk.mat)%*%pgk.mat - r * p.mean.k%*%t(p.mean.k))/(r-1)
  pe <- sum(weights.mat * (p.mean.k%*%t(p.mean.k) -  s2kl.mat/r))
  conger.kappa <- (pa-pe)/(1-pe)
  
  # calculating variance, stderr & p-value of conger's kappa coefficient
  
  bkl.mat <- (weights.mat+t(weights.mat))/2
  pe.ivec1 <- r*(agree.mat%*%t(t(p.mean.k)%*%bkl.mat))
  pe.ivec2 = rep(0,n)
  
  lamda.ig.mat=matrix(0,n,r)
  epsi.ig.mat <-1-is.na(ratings.mat)
  epsi.ig.mat <- replace(epsi.ig.mat,is.na(epsi.ig.mat),FALSE)
  for(k in 1:q){
    lamda.ig.kmat=matrix(0,n,r)
    for(l in 1:q){
      delta.ig.mat <- (ratings.mat==categ[l])
      delta.ig.mat <- replace(delta.ig.mat,is.na(delta.ig.mat),FALSE)
      lamda.ig.kmat <- lamda.ig.kmat + weights.mat[k,l] * (delta.ig.mat - (epsi.ig.mat - rep(1,n)%*%t(ng.vec/n)) * (rep(1,n)%*%t(pgk.mat[,l])))
    }
    lamda.ig.kmat = lamda.ig.kmat*(rep(1,n)%*%t(n/ng.vec))
    lamda.ig.mat = lamda.ig.mat+ lamda.ig.kmat*(r*mean(pgk.mat[,k]) - rep(1,n)%*%t(pgk.mat[,k]))
  }
  pe.ivec <- (lamda.ig.mat%*%rep(1,r)) / (r*(r-1))
  den.ivec <- ri.vec*(ri.vec-1)
  den.ivec <- den.ivec - (den.ivec==0) # this operation replaces each 0 value with -1 to make the next ratio calculation always possible.
  pa.ivec <- sum.q/den.ivec
  pe.r2 <- pe*(ri.vec>=2)
  conger.ivec <- (n/n2more)*(pa.ivec-pe.r2)/(1-pe) 
  conger.ivec.x <- conger.ivec - 2*(1-conger.kappa) * (pe.ivec-pe)/(1-pe)
  invisible(conger.ivec.x)
}
#===========================================================================================
#bp.linear.i: This function produces all n linear components associated with  
#		           the n subjects, and the linearized version of B-P's coefficient.
#-------------
#The input data "ratings" is a nxr data frame of raw alphanumeric ratings
#from n subjects and r raters. Exclude all subjects that are not rated by any rater.
#Bibliography:
#Brennan, R.L., and Prediger, D. J. (1981). ``Coefficient Kappa: some uses, misuses, and alternatives."
#           Educational and Psychological Measurement, 41, 687-699.
#======================================================================================
bp.linear.i <- function(ratings,weights="unweighted",conflev=0.95,N=Inf){ 
  ratings.mat <- as.matrix(ratings) 
  if (is.character(ratings.mat)){ratings.mat <- toupper(trim(ratings.mat))}
  n <- nrow(ratings.mat) # number of subjects
  r <- ncol(ratings.mat) # number of raters
  f <- n/N # final population correction 
  
  # creating a vector containing all categories used by the raters
  
  categ.init <- unique(as.vector(ratings.mat))
  if (is.numeric(categ.init))
    categ <- sort(as.vector(na.omit(categ.init)))
  else{
    categ.init <- trim(categ.init) #trim vector elements to remove leading and trailing blanks
    categ <- categ.init[nchar(categ.init)>0]
    categ <- sort(categ)
  }
  q <- length(categ)
  
  # creating the weights matrix
  
  if (is.character(weights)){
    if (weights=="quadratic")
      weights.mat<-quadratic.weights(categ)
    else if (weights=="ordinal")
      weights.mat<-ordinal.weights(categ)
    else if (weights=="linear")
      weights.mat<-linear.weights(categ)
    else if (weights=="radical")
      weights.mat<-radical.weights(categ)
    else if (weights=="ratio")
      weights.mat<-ratio.weights(categ)
    else if (weights=="circular")
      weights.mat<-circular.weights(categ)
    else if (weights=="bipolar")
      weights.mat<-bipolar.weights(categ)
    else weights.mat<-identity.weights(categ)
  }else weights.mat= as.matrix(weights)
  
  # creating the nxq agreement matrix representing the distribution of raters by subjects and category
  
  agree.mat <- matrix(0,nrow=n,ncol=q)
  for(k in 1:q){
    if (is.numeric(ratings.mat)){
      k.mis <-(ratings.mat==categ[k])
      in.categ.k <- replace(k.mis,is.na(k.mis),FALSE)
      agree.mat[,k] <- in.categ.k%*%rep(1,r) 
    }else{
      in.k <- (trim(ratings.mat)==categ[k])
      in.k[is.na(in.k)] <- FALSE 
      agree.mat[,k] <- in.k%*%rep(1,r)
    }
  }
  agree.mat.w <- t(weights.mat%*%t(agree.mat))
  
  # calculating gwet's ac1 coefficient
  
  ri.vec <- agree.mat%*%rep(1,q)
  sum.q <- (agree.mat*(agree.mat.w-1))%*%rep(1,q)
  n2more <- sum(ri.vec>=2)
  pa <- sum(sum.q[ri.vec>=2]/((ri.vec*(ri.vec-1))[ri.vec>=2]))/n2more
  
  pi.vec <- t(t(rep(1/n,n))%*%(agree.mat/(ri.vec%*%t(rep(1,q)))))
  if (q>1){
    pe <- sum(weights.mat) / (q^2)
  }else{pe <- 0}
  bp.coeff <- (pa-pe)/(1-pe)
  
  # calculating variance, stderr & p-value of gwet's ac1 coefficient
  
  den.ivec <- ri.vec*(ri.vec-1)
  den.ivec <- den.ivec - (den.ivec==0) # this operation replaces each 0 value with -1 to make the next ratio calculation always possible.
  pa.ivec <- sum.q/den.ivec
  
  pe.r2 <- pe*(ri.vec>=2)
  bp.ivec <- (n/n2more)*(pa.ivec-pe.r2)/(1-pe)
  invisible(bp.ivec)
}