This commit is contained in:
Ananke 2018-09-13 22:23:56 +02:00
parent 1fc165a840
commit 5e1ddb623c

View file

@ -1,13 +1,16 @@
#![allow(unstable)] // allow unstable libraries #![allow(unstable)] // allow unstable libraries
#![feature(rustc_private)] #![feature(rustc_private)]
#[macro_use] #[macro_use] extern crate lazy_static;
extern crate chomp;
extern crate regex;
use std::io; use std::io;
use std::io::prelude::*; use std::io::prelude::*;
//use chomp::primitives::Primitives; //use chomp::primitives::Primitives;
use std::collections::HashMap; use std::collections::HashMap;
use regex::Regex;
use regex::Error;
/// write: how compiled? how run programm? which parameters for programm call? /// write: how compiled? how run programm? which parameters for programm call?
@ -18,45 +21,101 @@ fn main () {
// fill hash with genetic code // fill hash with genetic code
let mut code = HashMap::new(); let mut code = HashMap::new();
code = gencode(); // HashMap<String, String> code = gencode(); // HashMap<String, String>
let starts : [String; 3] = [format!("ATG"), format!("TTG"), format!("CTG")]; // array -> fixed size!
let limit = 100000; // length limitation for analysed sequence window
/* // translate codon into AS // regex for identication of nucleodide
let t = "TTT".to_string(); // let nc_pattern = r"[ATCGN]";
// let res = Regex::new(nc_pattern);
// translate codon into AS
/* let t = "TTT".to_string();
let AS = code.get(&t); let AS = code.get(&t);
println!("AS for {} is {}", t, AS.unwrap()); println!("AS for {} is {}", t, AS.unwrap());
*/ if let Ok(regex) = res {
println!("regex ok, trying to match..");
let matches = regex.is_match(&t);
if matches {
println!("{} is nucleotid {}", t, nc_pattern);
} else {
println!("{} NOT a nucleotid sequence {}", t, nc_pattern);
}
} else {
println!("{:?}", res);
}
*/
/* for key in code.keys() { // playing with regex
println!("{}", key); let nc_pattern = Regex::new(r"[ATCGN]").unwrap(); // assign regex
} let head = Regex::new(r"^>(.*)").unwrap(); // regex for capturing header
for val in code.values() { let test = ">wppwpweATTCGTGC8023582305";
println!("{}", val); // let after = re.replace_all(test, "--");
} // println!("{}", after);
for (key, val) in code.iter() { /* for cap in re.captures_iter(test) {
println!("{} {}", key, val); println!("{} -> {} -> {}", &cap[0], &cap[1], &cap[2]); // &cap[0] -> all captured by regex, &cap[1] -> 1st captured group, &cap[2] -> 2nd captured group, ...
} }
*/ */
/* for cap in head.captures_iter(test) {
println!("Header: {}", &cap[1]);
}
*/
//let matches = re.is_match(&test);
//assert!(re.is_match("TCGN"));
// read sequence from stdin // read sequence from stdin
let stdin = io::stdin(); let stdin = io::stdin();
let mut seq = String::new(); let mut seq = String::new();
for mut line in stdin.lock().lines() { for mut line in stdin.lock().lines() {
let mut read = line.unwrap(); let mut read = line.unwrap().to_string();
chomp(&mut read); let h = head.is_match(&read); // fasta header?
seq.push_str(&read); // concatenate strings let nt = nc_pattern.is_match(&read); // nucleotide sequences?
//println!("{}", seq); if h {
//eprintln!("HEADER found! {}", read);
} else if nt {
//eprintln!("NUCLEOTIDE sequences {}", read);
chomp(&mut read);
//println!("{}", read.len()); // 80 nt
seq.push_str(&read);
if seq.len() > limit {
//let codons = triple(&seq);
println!("{}", seq.len());
seq = seq[81..(seq.len() - 81)].to_string();
}
// search for startcodon in seq
// search for stopcodon in seq
// start & stopcodon?? -> get sequence!
//seq.push_str(&read); // concatenate strings
//println!("{}", seq);
} else {
eprintln!("[ERROR]\tskipping line because of non-nucleotid sequence!");
continue;
}
} }
// split sequence into codons // split sequence into codons
let codons = triple(&seq); /* let codons = triple(&seq);
for &c in codons.iter() { for &c in codons.iter() {
//println!("{}", c); //println!("{}", c);
// find startcodon
//let ini = starts.iter().position(|r| r == c).unwrap();
//println!("{}", ini);
// translate codons // translate codons
let AS = code.get(c); let AS = code.get(c);
match AS { match AS {
Some(AS) => println!("{}", AS), Some(AS) => println!("{}", AS),
None => println!("Something went wrong!!\n{} is no codon!", c), //println!("AS for codon {} is {}!", c, AS); None => println!("Something went wrong!!\n{} is no codon!", c), //println!("AS for codon {} is {}!", c, AS);
} }
} }
*/
// clean sequence -> remove header (![ATCGNU]) // clean sequence -> remove header (![ATCGNU])