I will like to avoid a loop in the following code:
delta_S <- function(Ro, Rr, Ir, S1, S2, S3, S4, chromaty) { .... etc .....}
for (i in 1:nrow(Rrecon)) {
gri[i, 6] <- delta_S(Ro=as.vector(Rrecon[i, ]), Rr=data_base$bck, Ir=data_base$Ir, S1=data_base$s1, S2=data_base$s2, S3=data_base$s3, S4=data_base$s4, chromaty="tetra")
}
My problem is that in my dataset, i
varies from 1 to more than a million. I guess by using something like apply
I could save time compared to the 19h currently required.
Thank you for any reply!
Update:
I have read some info about vectorization but it seems largely out of my skills (and I do not even speak about using C ou FORTRAN: I have learned R for three months only, to resolve this study). So by chance, if somebody has some time to look at in details my code and to propose a vectorization if it is possible, it would be hugely appreciated! Thank you Julien PS I copy the entire code because I do not know where it can be vectorize!
data<-read.table("databird.txt",sep="\t",dec=".",header=T,row.names=1)
#perform a PCA
pca<-dudi.pca(data, center = FALSE, scale = FALSE, scannf = FALSE, nf = ncol(data))
#definition of some functions
Q <- function(R,Ir,Si){
temp <- R/100*Ir*Si
return(sum((temp[2:length(temp)]+temp[1:(length(temp)-1)])/2))
}
delta_f <- function(Ro,Rr,Ir,Si) {
Qo <- Q(Ro,Ir,Si)
Qr <- Q(Rr,Ir,Si)
#if(Qo/Qr <0) {print("bug: Qo/Qr < 0, bug log neg"); return(NaN)}
return(log(Qo/Qr))
}
delta_S <- function(Ro,Rr,Ir,S1,S2,S3,S4,chromaty){
if(chromaty=="tetra"){
e1 <- 0.1
e2 <- 0.07
e3 <- 0.07
e4 <- 0.05
delta_f1 <- delta_f(Ro,Rr,Ir,S1)
delta_f2 <- delta_f(Ro,Rr,Ir,S2)
delta_f3 <- delta_f(Ro,Rr,Ir,S3)
delta_f4 <- delta_f(Ro,Rr,Ir,S4)
numerator <- (e1*e2)^2*(delta_f4-delta_f3)^2+(e1*e3)^2*(delta_f4-delta_f2)^2+(e1*e4)^2*(delta_f2-delta_f3)^2+(e2*e3)^2*(delta_f4-delta_f1)^2+(e2*e4)^2*(delta_f3-delta_f1)^2+(e3*e4)^2*(delta_f2-delta_f1)^2
denominator <- (e1*e2*e3)^2+(e1*e2*e4)^2+(e1*e3*e4)^2+(e2*e3*e4)^2
return(sqrt(numerator/denominator))
}
if(chromaty=="tri"){
e1 <- 0.0425
e2 <- 0.02
e3 <- 0.02
delta_f1 <- delta_f(Ro,Rr,Ir,S1)
delta_f2 <- delta_f(Ro,Rr,Ir,S2)
delta_f3 <- delta_f(Ro,Rr,Ir,S3)
numerator <- (e1^2*(delta_f3-delta_f2)^2+e2^2*(delta_f3-delta_f1)^2+e3^2*(delta_f1-delta_f2)^2)
denominator <- ((e1*e2)^2+(e1*e3)^2+(e2*e3)^2)
return(sqrt(numerator/denominator))
}
if(chromaty=="di"){
e1 <- 0.06
e2 <- 0.02
delta_f1 <- delta_f(Ro,Rr,Ir,S1)
delta_f2 <- delta_f(Ro,Rr,Ir,S2)
numerator <- (delta_f1-delta_f2)^2
denominator <- (e1^2+e2^2)
return(sqrt(numerator/denominator))
}
print("specification du niveau de chromaty incorrect")
return(NaN)
}
reconstBIS<-function (dudi, nf = 1,scoregrid, ...)
{
if (!inherits(dudi, "dudi"))
stop("Object of class 'dudi' expected")
if (nf > dudi$nf)
stop(paste(nf, "factors need >", dudi$nf, "factors available\n"))
if (!inherits(dudi, "pca"))
stop("Object of class 'dudi' expected")
cent <- dudi$cent
norm <- dudi$norm
n <- nrow(scoregrid)
p <- ncol(dudi$tab)
res <- matrix(0, n, p)
for (i in 1:nf) {
xli <- scoregrid[, i]
yc1 <- dudi$c1[, i]
res <- res + matrix(xli, n, 1) %*% matrix(yc1, 1, p)
}
res <- t(apply(res, 1, function(x) x * norm))
res <- t(apply(res, 1, function(x) x + cent))
res <- data.frame(res)
names(res) <- names(dudi$tab)
row.names(res) <- paste(scoregrid[,1],scoregrid[,2],scoregrid[,3],scoregrid[,4],scoregri [,5],sep="/")
return(res)
}
#creation of a grid with values to enter in the function delta_S
ran1<-seq(-410,-400,2)
ran2<-seq(110,120,2)
ran3<-seq(10,20,2)
ran4<-seq(-40,-30,2)
ran5<-seq(10,20,2)
Length<-length(ran1)*length(ran2)*length(ran3)*length(ran4)*length(ran5)
print(Length)
gri <- matrix(NA,ncol=6,nrow=Length)
position <- 1
for(i in 1:length(ran1)) {
for(j in 1:length(ran2)){
for(k in 1:length(ran3)){
for(l in 1:length(ran4)){
for(m in 1:length(ran5)){
gri[position,1] <- ran1[i]
gri[position,2] <- ran2[j]
gri[position,3] <- ran3[k]
gri[position,4] <- ran4[l]
gri[position,5] <- ran5[m]
position <- position + 1
if(position %% 100 == 0) print(paste(round(100*position/Length,2),"%"))
}
}
}
}
};gc()
Rrecon<-reconstBIS(pca,5,scoregrid=gri)
data_base <- read.table("data_base.txt",header=T,dec=".",sep="\t")
And here's the final loop to avoid:
for (i in 1:nrow(Rrecon)) {
gri[i,6]<- delta_S(Ro=as.vector(Rrecon[i,]),Rr=data_base$bck,Ir=data_base$Ir,S1=data_base$s1,S2=data_base$s2,S3=data_base$s3,S4=data_base$s4,chromaty="tetra")
if(i %% 100 == 0) print(paste(round(100*i/nrow(Rrecon),2),"%"))
}
Thank you very much !