logo

Create bulk file


bind="/network/lustre/iss02/ukbiobank/software" 
wd="/network/lustre/iss02/ukbiobank/unorganized/unorganized_pheno"
cd $wd
${bind}/ukbconv ukb50815.enc_ukb bulk -s20253

Extract first visits (remove imaging second wave)

blk = read.table("ukb50815_20252.bulk", stringsAsFactors = F)

table(blk$V2)
write.table(blk[-which(blk$V2 == "20252_3_0"), ], "ukb50815.20252_2.bulk",
    col.names = F, row.names = F, quote = F)

#Remove unusable subjects (that I gotten through the FSL process (_20252))

blk_v2 = read.table("ukb50815.20252_2.bulk", stringsAsFactors = F)
unusable = read.table("unusable.csv", header = F)
usables = setdiff(ukb$V1, unusable$V1)
ukb_usable = ukb[ukb$V1 %in% usables, ]
write.table(ukb_usable, "ukb50815.20252.bulk", col.names = F,
    row.names = F, quote = F)

#before


bind="/network/lustre/iss02/ukbiobank/software" 
ukb="/network/lustre/iss02/ukbiobank/unorganized/unorganized_pheno" # UKB files
wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"

mkdir -p ${wd}/Downloadfsl
cp ${ukb}/k53185r50815.key ${wd}/Downloadfsl/k53185r50815.key
cp ${wd}/ukb50815.20252.bulk  ${wd}/Downloadfsl/ukb50815.20252.bulk

bind="/network/lustre/iss02/ukbiobank/software" 
ukb="/network/lustre/iss02/ukbiobank/unorganized/unorganized_pheno" # UKB files
wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"



cd /network/lustre/iss02/home/elise.delzant/Downloadfsl
bind="/network/lustre/iss02/ukbiobank/software" 

for step in $(seq 1 1 1)   # ....
do 
${bind}/ukbfetch -bdiff.bulk -ak53185r673035.key -s${step} -m1 #new key 
done 

#step 0 create directory

#convert array to table

#step 1-2ab : brain extraction and tissue seg for vbm, and Affine-registration to the GM ICBM-152 template, concatenation and average for both

#step 2c : Averaged image is flipped along x-axis and the two mirror images then re-averaged  first-pass template

#step 2d: Non-linear Registration to first pass template ,concatenation and average

#step 2e:Averaged image is flipped along x-axis and the two mirror images then re-averaged –> GM template

#step 3a: Register all images to template

#create a mask on all output files, that represents voxels that are equal to zero for all output images

#add script that keep only gm mod registered nifti, and convert it to tsv

#corresponding Rscript

#add ID to anat and vbm tsv

wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
bind="/network/lustre/iss02/ukbiobank/software" 
cd ${wd}
${bind}/qsubshcom "dat=\$(sed -n \"\${TASK_ID}{p;q}\" ${wd}/Downloadfsl/participants.tsv)|;
cd ${wd}/FSLVBM/\${dat}/output/anat|;
echo \${dat} > anat_mod_masked_id.tsv|;
cat anat_mod_masked.tsv>>anat_mod_masked_id.tsv|;
rm anat_mod_masked.tsv|;
cd ${wd}/FSLVBM/\${dat}/output/vbm|;
echo \${dat} > vbm_mod_masked_id.tsv|;
cat vbm_mod_masked.tsv>>vbm_mod_masked_id.tsv|;
rm vbm_mod_masked.tsv|;
" 1 4G id_to_tsv 00:30:00 "-array=2-42789" 

#create file with all output .tsv from each pipeline #a batch = 500 participants

wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
dl="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant/Downloadfsl"
bind="/network/lustre/iss02/ukbiobank/software" 
cd ${wd}/FSLVBM
for batch in {01..86}
do 
echo>tsv_anat_batch${batch}
for elem in `cat ${dl}/participants.batch${batch}`
do 
paste tsv_anat_batch${batch} $elem/output/anat/anat_mod_masked_id.tsv>anat_tmp_batch${batch}
cp anat_tmp_batch${batch} tsv_anat_batch${batch}
done 
rm anat_tmp_batch${batch}
done 

wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
dl="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant/Downloadfsl"
bind="/network/lustre/iss02/ukbiobank/software" 
cd ${wd}/FSLVBM
for batch in {01..86}
do 
echo>tsv_vbm_batch${batch}
for elem in `cat ${dl}/participants.batch${batch}`
do 
paste tsv_vbm_batch${batch} $elem/output/vbm/vbm_mod_masked_id.tsv>vbm_tmp_batch${batch}
cp vbm_tmp_batch${batch} tsv_vbm_batch${batch}
done 
rm vbm_tmp_batch${batch}
done 

#Merging all batches #create BodFiles

Import text files in OSCA


wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
bind="/network/lustre/iss02/ukbiobank/software" 


mkdir -p ${wd}/FSLVBM/BodFilesFSL
od="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant/FSLVBM"
cd ${od}
proc=vbm
for batch in {01..86}
do
# Cortical
${bind}/qsubshcom " paste voxel_${proc} tsv_${proc}_batch${batch}>tsv_${proc}_batch${batch}vox|;
rm tsv_${proc}_batch${batch}|;
${bind}/osca --tefile ${od}/tsv_${proc}_batch${batch}vox --make-bod --no-fid --out ${od}/BodFilesFSL/${proc}.batch${batch}" 1 10G ${proc}_MakeBod${batch} 10:00:00 "" 
done

#Merge all bod into one #mybod.flist is a list with paths to bod files to be merged


wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
bind="/network/lustre/iss02/ukbiobank/software" 

cd ${wd}/BodFilesFSL
${bind}/qsubshcom " "${bind}"/osca --befile-flist "${wd}"/FSLVBM/BodFilesFSL/mybod.flistvbm --make-bod --no-fid --out "${wd}"/BodFilesFSL/UKB_VbmFinalBod " 1 8G Merging_Bod 02:00:00 "" 

${bind}/qsubshcom " "${bind}"/osca --befile-flist "${wd}"/FSLVBM/BodFilesFSL/mybod.flistanat --make-bod --no-fid --out "${wd}"/BodFilesFSL/UKB_AnatFinalBod " 1 8G Merging_Bod 02:00:00 "" 

#extracting mean and var from BodFiles

wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
bind="/network/lustre/iss02/ukbiobank/software" 

cd ${wd}/BodFilesFSL


#mean and var for total bod 
${bind}/qsubshcom " "${bind}"/osca --befile "${wd}"/BodFilesFSL/UKB_AnatFinalBod --get-mean --get-variance --out "${wd}"/BodFilesFSL/fsl_anat " 4 100G fsl_var_anat2 48:00:00 "" 

${bind}/qsubshcom " "${bind}"/osca --befile "${wd}"/BodFilesFSL/UKB_VbmFinalBod  --get-mean --get-variance --out "${wd}"/BodFilesFSL/fsl_vbm " 4 100G fsl_var_vbm 48:00:00 "" 

#we want to keep only best voxels (ie the one that represent GM), we find a threshold according to mean and var of each vox #plotting mean and var for each processing

mean_anat <- read.table("../fsl_anat.mean.txt")
var_anat <- read.table("../fsl_anat.var.txt")

df <- data.frame(Mean = mean_vbm$V2, Var = var_vbm$V2, vox = mean_vbm$V1)
ggplot(df, aes(Mean, Var)) + geom_point(size = 0.01) + ggtitle("FSLVBM") +
    theme(axis.text.x = element_text(size = 16, face = "bold"),
        axis.text.y = element_text(size = 16, face = "bold"),
        axis.title = element_text(size = 24, face = "bold"),
        plot.title = element_text(size = 24, face = "bold"))

df_cut <- df[df$Mean > 0.1 | df$Var > 0.01, ]  #vbm = 181544, anat = 184 637 #threshold on mean and var 

write.table(df_cut$vox, "../vox_to_keep_vbm", row.names = F,
    col.names = F)  #les 181 544 qu'on garde parmi les 248053 pour vbm et 184 637 parmi 248053 pour anat 

# plot these vox on image to get the mask we do the
# following lines for anat and vbm

library(oro.nifti)
pre_mask <- readNIfTI("../mask_zero_anat_mod.nii.gz", reorient = F)
val <- c(pre_mask)
vox <- seq(1, 902629)
pre_mask <- data.frame(val, vox)
ones <- pre_mask[pre_mask$val == 1, ]  #ceux qui valent 1 dans le pré mask, pre mask = on exclut les vox qui sont 0 en mean et var pour tout le monde
corresp_vox <- cbind(ones, data.frame(mask2 = seq(1, 254019)))
corresp_vox[!(corresp_vox$mask2 %in% df_cut$vox), "val"] <- 0
mask <- corresp_vox[corresp_vox$val == 1, ]

dim(mask)

voxels <- seq(1, 902629)
img_df <- data.frame(voxels)
img_df$value <- 0

img_df[which(img_df$voxels %in% mask$vox), "value"] <- 1
img_df <- array(img_df$value, dim = c(91, 109, 91))

writeNIfTI(img_df, file = "../test_anat")


# plot for one subject : rm val = 0 , on regarde que val
# <0.1 et on plot hist pour les 3 process
im_cat12 <- read.table("../../Filezilla_dl/mwp1T1_brain_id_3217683.tsv")
im_vbm <- c(readNIfTI("../../Filezilla_dl/T1_brain_struc_GM_to_template_GM_mod_3217683.nii.gz"))
im_anat <- c(readNIfTI("../../Filezilla_dl/T1_brain_pve_1_struc_GM_to_template_GM_mod_3217683.nii.gz"))

im_cat12 <- im_cat12$V1[-1]
im_cat12 <- im_cat12[im_cat12 != 0]  #345 207
im_vbm <- im_vbm[im_vbm != 0]  #160 230
im_anat <- im_anat[im_anat != 0]  #164 565

df_cat12 <- data.frame(Val = im_cat12, Process = "CAT12")
df_vbm <- data.frame(Val = im_vbm, Process = "FSLVBM")
df_anat <- data.frame(Val = im_anat, Process = "FSLANAT")

df_cat12 <- df_cat12[df_cat12$Val < 0.02, ]  #167 358
df_vbm <- df_vbm[df_vbm$Val < 0.02, ]  # 22 634
df_anat <- df_anat[df_anat$Val < 0.02, ]  # 22 266

df <- rbind(df_cat12, df_vbm, df_anat)
ggplot(df, aes(x = Val)) + geom_histogram(data = df[df$Process ==
    "FSLVBM", ], fill = "blue", alpha = 0.2, bins = 300) + geom_histogram(data = df[df$Process ==
    "FSLANAT", ], fill = "green", alpha = 0.2, bins = 300) +
    geom_histogram(data = df[df$Process == "CAT12", ], fill = "red",
        alpha = 0.2, bins = 300)

#Get only voxels under cut off (cut_off.R file)


wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
bind="/network/lustre/iss02/ukbiobank/software" 

cd ${wd}/BodFilesFSL

for fsl in Anat Vbm 
do
${bind}/qsubshcom " "${bind}"/osca --befile "${wd}"/BodFilesFSL/UKB_${fsl}FinalBod --extract-probe vox_to_keep_${fsl} --make-bod --no-fid --out "${wd}"/BodFilesFSL/UKB_${fsl}_SubVox " 4 128G sub_Bod_${fsl} 48:00:00 "--partition=bigmem" 
done 

#dividing into main and replicate


wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
bind="/network/lustre/iss02/ukbiobank/software" 

cd ${wd}/BodFilesFSL

for fsl in Anat Vbm
do 
for div in replicate main
do
${bind}/qsubshcom " "${bind}"/osca --befile "${wd}"/BodFilesFSL/UKB_${fsl}_SubVox --keep "${wd}"/BodFilesFSL/${div}_id.csv  --make-bod --no-fid --out "${wd}"/BodFilesFSL/UKB_${fsl}.FinalBod.${div} " 4 128G Bod_${fsl}.${div} 48:00:00 "--partition=bigmem" 
done 
done 

#BRM

wd="/network/lustre/iss02/ukbiobank/ukbiobank-remaining/elise.delzant"
bind="/network/lustre/iss02/ukbiobank/software" 

cd ${wd}/BodFilesFSL

for fsl in Anat Vbm
do 
for div in replicate main
do
${bind}/qsubshcom " "${bind}"/osca --befile "${wd}"/BodFilesFSL/UKB_${fsl}.FinalBod.${div} --make-orm-bin --out "${wd}"/BodFilesFSL/UKB_${fsl}.FinalBod.${div}.BRM --thread-num 5 " 8 120G FSL_BRM_calculation 10:00:00 "--partition=bigmem" 
done 
done 

#compare both BRM

library(png)
library(ggplot2)

source("RR_4.0_BRM_QC_functions.R")

brm1 <- asBRM(readORMBin("UKB_Vbm.FinalBod.main.BRM"))
brm2 <- asBRM(readORMBin("UKB_Anat.FinalBod.main.BRM"))

cor_fin < c()
for (i in 1:dim(brm1)[1]) {
    cor_fin <- c(cor_fin, cor(brm1[i, ], brm2[i, ]))
}

cor <- data.frame(cor = cor_fin)
cor$id <- seq(1, dim(brm1)[1])

png("corr_anat_vbm.png", width = 60, height = 30, units = "cm",
    res = 400)
par(mfrow = c(1, 2))
ggplot(corr, aes(x = id, y = cor)) + geom_point(size = 0.01)
dev.off()

png("corr_anat_vbm2.png", width = 60, height = 30, units = "cm",
    res = 400)
par(mfrow = c(1, 2))
ggplot(corr, aes(x = cor)) + geom_histogram(binwidth = 0.01,
    fill = "#69b3a2", color = "#e9ecef", alpha = 0.9)
dev.off()

#final

knitr::opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)
 




A work by by Elise Delzant - 31 July 2025