makepwm<-function(x,bg){ # x = matrix of N aligned sites coded numerically # bg = vector (1x4) of background base frequencies L<-length(x[1,]) # Number of positions in each site N<-length(x[,1]) # Number of sites pwm<-matrix(rep(1,4*L),nrow=4) # pwm initialized to 1 for each matrix element (pseudocounts) for (j in 1:L){ for (i in 1:N){ k <- x[i,j] pwm[k,j] <- pwm[k,j]+1 } } N <- N+4 # Denominator for small sample correction pwm<-pwm/N # PWM in terms of probabilities log2pwm<-matrix(rep(0,4*L),nrow=4,ncol=L) # Initialize PWM in terms of log(base 2) of p/q for(i in 1:4){ log2pwm[i,]<-log2(pwm[i,]/bg[i]) # Scores for each [nucleotide, position], base 2 } return(pwm, log2pwm) }