对bed文件举办表明,对bed文件举行阐明

   
很多时候,我们需要对取出的SNV举行诠释,那多少个时候恐怕会在R上举行注脚,日常注释文件都蕴涵Chr(染色体)、Start(起初位点)、End(截止位点)、Description(描述),而我辈的SNV文件一般是颇具Position(地点),因而大家可以先稳住Chr,再用Postion去稳定到Start和End之间,找到相呼应的Description。为了加快捷度,能够应用二分查找法。

   
很多时候,大家需要对取出的SNV举行诠释,这几个时候可能会在R上举办注脚,平常注释文件都蕴涵Chr(染色体)、Start(起首位点)、End(为止位点)、Description(描述),而我辈的SNV文件一般是有着Position(地方),因而大家得以先稳住Chr,再用Postion去稳定到Start和End之间,找到相呼应的Description。为了加快捷度,可以拔取二分查找法。

此处也提供此外一篇作品:

 1 for (value in dt$value){
 2 #df:data.frame, V1 and V2 should be Start and End   value: Postition  used to find region  return:df row number where position locates  ,if no region return -1
 3     low=1
 4     high=nrow(df)
 5     mid=high %/% 2
 6     if (df[low,1] <= value & value <= df[low,2]) low
 7     else if (df[high,1] <= value & value <= df[high,2]) high
 8     else{
 9     while (value > df[mid,2] || value < df[mid,1]){
10       if (value > df[mid,2]){
11         low = mid+1
12       } else if (value < df[mid,1]) {
13         high = mid - 1
14       } 
15       if(high<low){
16          mid=-1;break
17       }
18       mid=(low+high)%/%2
19     }
20       mid
21 }
22 }
 1 for (value in dt$value){
 2 #df:data.frame, V1 and V2 should be Start and End   value: Postition  used to find region  return:df row number where position locates  ,if no region return -1
 3     low=1
 4     high=nrow(df)
 5     mid=high %/% 2
 6     if (df[low,1] <= value & value <= df[low,2]) low
 7     else if (df[high,1] <= value & value <= df[high,2]) high
 8     else{
 9     while (value > df[mid,2] || value < df[mid,1]){
10       if (value > df[mid,2]){
11         low = mid+1
12       } else if (value < df[mid,1]) {
13         high = mid - 1
14       } 
15       if(high<low){
16          mid=-1;break
17       }
18       mid=(low+high)%/%2
19     }
20       mid
21 }
22 }

html”>http://www.BkJia.com/kf/200412/3263.html

在R中行使for循环功能低,因此也足以用data.table包的foverlap函数,立异代码如下,对bed文件举行诠释,假设要对snv举行声明,只需要将snv改成相应的start和end相等的bed文件即可。

在R中采纳for循环效能低,因而也可以用data.table包的foverlap函数,革新代码如下,对bed文件进行评释,假设要对snv举办诠释,只需要将snv改成对应的start和end相等的bed文件即可。

压缩:
Function fZip(sSourceFolder,sTargetZIPFile)
This function will add all of the files in a source folder to a ZIP
file
using Windows native folder ZIP capability.
Dim oShellApp, oFSO, iErr, sErrSource, sErrDescription
Set oShellApp = CreateObject(“Shell.Application”)
Set oFSO = CreateObject(“Scripting.FileSystemObject”)
The source folder needs to have a on the End
If Right(sSourceFolder,1) <> “” Then sSourceFolder = sSourceFolder
& “”
On Error Resume Next
If a target ZIP exists already, delete it
If oFSO.FileExists(sTargetZIPFile) Then oFSO.DeleteFile
sTargetZIPFile,True
iErr = Err.Number
sErrSource = Err.Source
sErrDescription = Err.Description
On Error GoTo 0
If iErr <> 0 Then
fZip = Array(iErr,sErrSource,sErrDescription)
Exit Function
End If
On Error Resume Next
Write the fileheader for a blank zipfile.
oFSO.OpenTextFile(sTargetZIPFile, 2, True).Write “PK” & Chr(5) & Chr(6)
& String(18, Chr(0))
iErr = Err.Number
sErrSource = Err.Source
sErrDescription = Err.Description
On Error GoTo 0
If iErr <> 0 Then
fZip = Array(iErr,sErrSource,sErrDescription)
Exit Function
End If
On Error Resume Next
Start copying files into the zip from the source folder.
oShellApp.NameSpace(sTargetZIPFile).CopyHere
oShellApp.NameSpace(sSourceFolder).Items
iErr = Err.Number
sErrSource = Err.Source
sErrDescription = Err.Description
On Error GoTo 0
If iErr <> 0 Then
fZip = Array(iErr,sErrSource,sErrDescription)
Exit Function
End If
Because the copying occurs in a separate process, the script will just
continue. Run a DO…LOOP to prevent the function
from exiting until the file is finished zipping.
Do Until oShellApp.NameSpace(sTargetZIPFile).Items.Count =
oShellApp.NameSpace(sSourceFolder).Items.Count
 WScript.Sleep 1500倘若不成事,扩展一下秒数
Loop
fZip = Array(0,””,””)
End Function

 1 #!/bin/Rscript
 2 
 3 library(data.table)
 4 
 5 arg <- commandArgs(T)
 6 if (length(arg) != 3) {
 7     message("[usage]: BedAnnoGene.R bedfile gtffile outputfile")
 8     message("    bedfile format: chr start end information(Arbitrary but can not be lacked)")
 9     message("    GTFfile: gtf file downloaded from GENCODE")
10     message("    outputfile: file to be writen out")
11     message("    needed package: data.table 1.10.4")
12     stop("Please check your arguments!")
13 }
14     
15 bedfile <- arg[1]
16 annofile <- arg[2]
17 outfile <- arg[3]
18 
19 #read file 
20 anno <- fread(annofile,sep="\t",header=F)
21 bed <- fread(bedfile,sep="\t",header=F)
22 setnames(anno,c("V1","V2","V3","V4","V5","V9"),c("Chr","Gene","Type","Start","End","Info"))
23 anno <- anno[Type=="gene",.(Chr,Start,End,Gene=sapply(strsplit(tstrsplit(Info,";")[3][[1]],"\""),function(x)x[2]))]
24 setkey(anno,Chr,Start,End)
25 setkey(bed,V1,V2,V3)
26 
27 #find overlaps by Chr
28 lst <- list()
29 for (ChrI in intersect(unique(bed$V1),unique(anno$Chr))){
30   anno_reg <- anno[Chr == ChrI,.(Start,End)]
31   bed_reg <- bed[V1 == ChrI,.(V2,V3)]
32   setkey(anno_reg,Start,End)
33   setkey(bed_reg,V2,V3)
34   overl <- foverlaps(bed_reg,anno_reg,which=TRUE,nomatch = 0)
35   if (nrow(overl) > 0){
36     lst[[ChrI]] <- data.table(Chr=ChrI,bed[V1 == ChrI,][overl[["xid"]],.(V2,V3,V4)],anno[Chr == ChrI][overl[["yid"]],.(Gene)])
37   }
38 }
39 merge_dt <- rbindlist(lst)
40 setnames(merge_dt,c("V2","V3","V4"),c("Start","End","Name"))
41 
42 #if one region has more than one gene
43 torm <- list()
44 for (i in 1:(nrow(merge_dt)-1)){if(merge_dt[i,"Name"]==merge_dt[i+1,"Name"]){set(merge_dt,i+1L,ncol(merge_dt),paste(merge_dt[i,"Gene"],merge_dt[i+1,"Gene"],sep=";"));torm <- c(torm,list(i))}}
45 torm <- unlist(torm)
46 merge_dt <- merge_dt[-torm,]
47 
48 fwrite(merge_dt,file=outfile)
 1 #!/bin/Rscript
 2 
 3 library(data.table)
 4 
 5 arg <- commandArgs(T)
 6 if (length(arg) != 3) {
 7     message("[usage]: BedAnnoGene.R bedfile gtffile outputfile")
 8     message("    bedfile format: chr start end information(Arbitrary but can not be lacked)")
 9     message("    GTFfile: gtf file downloaded from GENCODE")
10     message("    outputfile: file to be writen out")
11     message("    needed package: data.table 1.10.4")
12     stop("Please check your arguments!")
13 }
14     
15 bedfile <- arg[1]
16 annofile <- arg[2]
17 outfile <- arg[3]
18 
19 #read file 
20 anno <- fread(annofile,sep="\t",header=F)
21 bed <- fread(bedfile,sep="\t",header=F)
22 setnames(anno,c("V1","V2","V3","V4","V5","V9"),c("Chr","Gene","Type","Start","End","Info"))
23 anno <- anno[Type=="gene",.(Chr,Start,End,Gene=sapply(strsplit(tstrsplit(Info,";")[3][[1]],"\""),function(x)x[2]))]
24 setkey(anno,Chr,Start,End)
25 setkey(bed,V1,V2,V3)
26 
27 #find overlaps by Chr
28 lst <- list()
29 for (ChrI in intersect(unique(bed$V1),unique(anno$Chr))){
30   anno_reg <- anno[Chr == ChrI,.(Start,End)]
31   bed_reg <- bed[V1 == ChrI,.(V2,V3)]
32   setkey(anno_reg,Start,End)
33   setkey(bed_reg,V2,V3)
34   overl <- foverlaps(bed_reg,anno_reg,which=TRUE,nomatch = 0)
35   if (nrow(overl) > 0){
36     lst[[ChrI]] <- data.table(Chr=ChrI,bed[V1 == ChrI,][overl[["xid"]],.(V2,V3,V4)],anno[Chr == ChrI][overl[["yid"]],.(Gene)])
37   }
38 }
39 merge_dt <- rbindlist(lst)
40 setnames(merge_dt,c("V2","V3","V4"),c("Start","End","Name"))
41 
42 #if one region has more than one gene
43 torm <- list()
44 for (i in 1:(nrow(merge_dt)-1)){if(merge_dt[i,"Name"]==merge_dt[i+1,"Name"]){set(merge_dt,i+1L,ncol(merge_dt),paste(merge_dt[i,"Gene"],merge_dt[i+1,"Gene"],sep=";"));torm <- c(torm,list(i))}}
45 torm <- unlist(torm)
46 merge_dt <- merge_dt[-torm,]
47 
48 fwrite(merge_dt,file=outfile)

Call fZip (“C:vbs”,”c:vbs.zip”)

选拔协理可以在自家github看到   https://github.com/yiliu4234/BedAnnoGene

行使协理可以在自我github看到   https://github.com/yiliu4234/BedAnnoGene

解压缩:
Function fUnzip(sZipFile,sTargetFolder)
Create the Shell.Application object
Dim oShellApp:Set oShellApp = CreateObject(“Shell.Application”)
Create the File System object
Dim oFSSet oFSO = CreateObject(“Scripting.FileSystemObject”)
Create the target folder if it isnt already there
If Not oFSO.FolderExists(sTargetFolder) Then oFSO.CreateFolder
sTargetFolder
Extract the files from the zip into the folder
oShellApp.NameSpace(sTargetFolder).CopyHere
oShellApp.NameSpace(sZipFile).Items
This is a seperate process, so the script would continue even if the
unzipping is not done
To prevent this, we run a DO…LOOP once a second checking to see if the
number of files
in the target folder equals the number of files in the zipfile. If so,
we continue.
Do
WScript.Sleep 1000‘有时需要转移
Loop While oFSO.GetFolder(sTargetFolder).Files.Count <
oShellApp.NameSpace(sZipFile).Items.Count
End Function

 

 

http://www.bkjia.com/VBjc/525890.htmlwww.bkjia.comtruehttp://www.bkjia.com/VBjc/525890.htmlTechArticle这里也提供另外一篇文章:
html”>http://www.2cto.com/kf/200412/3263.html 压缩: Function
fZip(sSourceFolder,sTargetZIPFile) This function will add all of the
files in…

相关文章