update
This commit is contained in:
parent
1fc165a840
commit
5e1ddb623c
1 changed files with 78 additions and 19 deletions
|
@ -1,13 +1,16 @@
|
|||
#![allow(unstable)] // allow unstable libraries
|
||||
#![feature(rustc_private)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate chomp;
|
||||
#[macro_use] extern crate lazy_static;
|
||||
|
||||
extern crate regex;
|
||||
|
||||
use std::io;
|
||||
use std::io::prelude::*;
|
||||
//use chomp::primitives::Primitives;
|
||||
use std::collections::HashMap;
|
||||
use regex::Regex;
|
||||
use regex::Error;
|
||||
|
||||
|
||||
/// write: how compiled? how run programm? which parameters for programm call?
|
||||
|
@ -18,45 +21,101 @@ fn main () {
|
|||
// fill hash with genetic code
|
||||
let mut code = HashMap::new();
|
||||
code = gencode(); // HashMap<String, String>
|
||||
let starts : [String; 3] = [format!("ATG"), format!("TTG"), format!("CTG")]; // array -> fixed size!
|
||||
let limit = 100000; // length limitation for analysed sequence window
|
||||
|
||||
/* // translate codon into AS
|
||||
let t = "TTT".to_string();
|
||||
// regex for identication of nucleodide
|
||||
// let nc_pattern = r"[ATCGN]";
|
||||
// let res = Regex::new(nc_pattern);
|
||||
|
||||
// translate codon into AS
|
||||
/* let t = "TTT".to_string();
|
||||
let AS = code.get(&t);
|
||||
println!("AS for {} is {}", t, AS.unwrap());
|
||||
*/
|
||||
if let Ok(regex) = res {
|
||||
println!("regex ok, trying to match..");
|
||||
let matches = regex.is_match(&t);
|
||||
if matches {
|
||||
println!("{} is nucleotid {}", t, nc_pattern);
|
||||
} else {
|
||||
println!("{} NOT a nucleotid sequence {}", t, nc_pattern);
|
||||
}
|
||||
} else {
|
||||
println!("{:?}", res);
|
||||
}
|
||||
*/
|
||||
|
||||
/* for key in code.keys() {
|
||||
println!("{}", key);
|
||||
}
|
||||
for val in code.values() {
|
||||
println!("{}", val);
|
||||
}
|
||||
for (key, val) in code.iter() {
|
||||
println!("{} {}", key, val);
|
||||
// playing with regex
|
||||
let nc_pattern = Regex::new(r"[ATCGN]").unwrap(); // assign regex
|
||||
let head = Regex::new(r"^>(.*)").unwrap(); // regex for capturing header
|
||||
let test = ">wppwpweATTCGTGC8023582305";
|
||||
// let after = re.replace_all(test, "--");
|
||||
// println!("{}", after);
|
||||
/* for cap in re.captures_iter(test) {
|
||||
println!("{} -> {} -> {}", &cap[0], &cap[1], &cap[2]); // &cap[0] -> all captured by regex, &cap[1] -> 1st captured group, &cap[2] -> 2nd captured group, ...
|
||||
}
|
||||
*/
|
||||
/* for cap in head.captures_iter(test) {
|
||||
println!("Header: {}", &cap[1]);
|
||||
}
|
||||
*/
|
||||
|
||||
//let matches = re.is_match(&test);
|
||||
//assert!(re.is_match("TCGN"));
|
||||
|
||||
// read sequence from stdin
|
||||
let stdin = io::stdin();
|
||||
let mut seq = String::new();
|
||||
for mut line in stdin.lock().lines() {
|
||||
let mut read = line.unwrap();
|
||||
chomp(&mut read);
|
||||
seq.push_str(&read); // concatenate strings
|
||||
//println!("{}", seq);
|
||||
let mut read = line.unwrap().to_string();
|
||||
let h = head.is_match(&read); // fasta header?
|
||||
let nt = nc_pattern.is_match(&read); // nucleotide sequences?
|
||||
if h {
|
||||
//eprintln!("HEADER found! {}", read);
|
||||
} else if nt {
|
||||
//eprintln!("NUCLEOTIDE sequences {}", read);
|
||||
chomp(&mut read);
|
||||
//println!("{}", read.len()); // 80 nt
|
||||
seq.push_str(&read);
|
||||
if seq.len() > limit {
|
||||
//let codons = triple(&seq);
|
||||
println!("{}", seq.len());
|
||||
seq = seq[81..(seq.len() - 81)].to_string();
|
||||
}
|
||||
|
||||
// search for startcodon in seq
|
||||
|
||||
// search for stopcodon in seq
|
||||
|
||||
// start & stopcodon?? -> get sequence!
|
||||
|
||||
//seq.push_str(&read); // concatenate strings
|
||||
//println!("{}", seq);
|
||||
} else {
|
||||
eprintln!("[ERROR]\tskipping line because of non-nucleotid sequence!");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// split sequence into codons
|
||||
let codons = triple(&seq);
|
||||
/* let codons = triple(&seq);
|
||||
for &c in codons.iter() {
|
||||
//println!("{}", c);
|
||||
|
||||
// find startcodon
|
||||
//let ini = starts.iter().position(|r| r == c).unwrap();
|
||||
//println!("{}", ini);
|
||||
|
||||
// translate codons
|
||||
let AS = code.get(c);
|
||||
match AS {
|
||||
Some(AS) => println!("{}", AS),
|
||||
None => println!("Something went wrong!!\n{} is no codon!", c), //println!("AS for codon {} is {}!", c, AS);
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
// clean sequence -> remove header (![ATCGNU])
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue