gncttr, a nucleotide translation tool for search of coding sequences
This commit is contained in:
parent
3957c36507
commit
1fc165a840
2 changed files with 212 additions and 7 deletions
182
gncttr/gncttr.rs
Normal file
182
gncttr/gncttr.rs
Normal file
|
@ -0,0 +1,182 @@
|
|||
#![allow(unstable)] // allow unstable libraries
|
||||
#![feature(rustc_private)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate chomp;
|
||||
|
||||
use std::io;
|
||||
use std::io::prelude::*;
|
||||
//use chomp::primitives::Primitives;
|
||||
use std::collections::HashMap;
|
||||
|
||||
|
||||
/// write: how compiled? how run programm? which parameters for programm call?
|
||||
// cat gncttr/*fna | cargo run --
|
||||
|
||||
// programm for extraction of codon sequences from genoms
|
||||
fn main () {
|
||||
// fill hash with genetic code
|
||||
let mut code = HashMap::new();
|
||||
code = gencode(); // HashMap<String, String>
|
||||
|
||||
/* // translate codon into AS
|
||||
let t = "TTT".to_string();
|
||||
let AS = code.get(&t);
|
||||
println!("AS for {} is {}", t, AS.unwrap());
|
||||
*/
|
||||
|
||||
/* for key in code.keys() {
|
||||
println!("{}", key);
|
||||
}
|
||||
for val in code.values() {
|
||||
println!("{}", val);
|
||||
}
|
||||
for (key, val) in code.iter() {
|
||||
println!("{} {}", key, val);
|
||||
}
|
||||
*/
|
||||
|
||||
// read sequence from stdin
|
||||
let stdin = io::stdin();
|
||||
let mut seq = String::new();
|
||||
for mut line in stdin.lock().lines() {
|
||||
let mut read = line.unwrap();
|
||||
chomp(&mut read);
|
||||
seq.push_str(&read); // concatenate strings
|
||||
//println!("{}", seq);
|
||||
}
|
||||
|
||||
// split sequence into codons
|
||||
let codons = triple(&seq);
|
||||
for &c in codons.iter() {
|
||||
//println!("{}", c);
|
||||
// translate codons
|
||||
let AS = code.get(c);
|
||||
match AS {
|
||||
Some(AS) => println!("{}", AS),
|
||||
None => println!("Something went wrong!!\n{} is no codon!", c), //println!("AS for codon {} is {}!", c, AS);
|
||||
}
|
||||
}
|
||||
|
||||
// clean sequence -> remove header (![ATCGNU])
|
||||
|
||||
/* // remove newline
|
||||
let mut test = format!("test\n");
|
||||
let dummy = format!("dummy1");
|
||||
//println!("{}", test);
|
||||
//rm_newline(&test);
|
||||
//test.pop();
|
||||
chomp(&mut test);
|
||||
//println!("{}", test);
|
||||
test.push_str(&dummy);
|
||||
println!("{}", test);
|
||||
*/
|
||||
// split sequence into triplet
|
||||
|
||||
// compare & translate triplet into AS
|
||||
|
||||
// CDS??
|
||||
|
||||
|
||||
// print out CDS
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
// remove newline character from string
|
||||
fn triple (string: &str) -> Vec<&str> {
|
||||
// split into triplets
|
||||
let mut triplet = String::new();
|
||||
let mut codon: Vec<&str> = Vec::new();;
|
||||
for (i, s) in string.chars().enumerate() {
|
||||
if i%3 == 0 {
|
||||
codon.push(&string[(i)..(i+3)]);
|
||||
triplet = format!(""); // empty string
|
||||
}
|
||||
triplet.push(s);
|
||||
}
|
||||
codon
|
||||
}
|
||||
|
||||
|
||||
// remove newline character at end of string // rebuild of trim_right_matches("\n")
|
||||
fn chomp(raw: &mut String) {
|
||||
let last = raw.chars().last().unwrap();
|
||||
//println!("last character of string is: {}", last);
|
||||
if last == '\n' {
|
||||
eprintln!("[INFO]\tNewline removed!");
|
||||
raw.pop();
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
// create hash containing genetic code
|
||||
fn gencode() -> (HashMap<String, String>) {
|
||||
let mut code = HashMap::new();
|
||||
code.insert("TTT".to_string(), "F".to_string());
|
||||
code.insert("TTC".to_string(), "F".to_string());
|
||||
code.insert("TTA".to_string(), "L".to_string());
|
||||
code.insert("TTG".to_string(), "L".to_string());
|
||||
code.insert("TCT".to_string(), "S".to_string());
|
||||
code.insert("TCC".to_string(), "S".to_string());
|
||||
code.insert("TCA".to_string(), "S".to_string());
|
||||
code.insert("TCG".to_string(), "S".to_string());
|
||||
code.insert("TAT".to_string(), "Y".to_string());
|
||||
code.insert("TAC".to_string(), "Y".to_string());
|
||||
code.insert("TAA".to_string(), "*".to_string());
|
||||
code.insert("TAG".to_string(), "*".to_string());
|
||||
code.insert("TGT".to_string(), "C".to_string());
|
||||
code.insert("TGC".to_string(), "C".to_string());
|
||||
code.insert("TGA".to_string(), "*".to_string());
|
||||
code.insert("TGG".to_string(), "W".to_string());
|
||||
code.insert("CTT".to_string(), "L".to_string());
|
||||
code.insert("CTC".to_string(), "L".to_string());
|
||||
code.insert("CTA".to_string(), "L".to_string());
|
||||
code.insert("CTG".to_string(), "L".to_string());
|
||||
code.insert("CCT".to_string(), "P".to_string());
|
||||
code.insert("CCC".to_string(), "P".to_string());
|
||||
code.insert("CCA".to_string(), "P".to_string());
|
||||
code.insert("CCG".to_string(), "P".to_string());
|
||||
code.insert("CAT".to_string(), "H".to_string());
|
||||
code.insert("CAC".to_string(), "H".to_string());
|
||||
code.insert("CAA".to_string(), "Q".to_string());
|
||||
code.insert("CAG".to_string(), "Q".to_string());
|
||||
code.insert("CGT".to_string(), "R".to_string());
|
||||
code.insert("CGC".to_string(), "R".to_string());
|
||||
code.insert("CGA".to_string(), "R".to_string());
|
||||
code.insert("CGG".to_string(), "R".to_string());
|
||||
code.insert("ATT".to_string(), "I".to_string());
|
||||
code.insert("ATC".to_string(), "I".to_string());
|
||||
code.insert("ATA".to_string(), "I".to_string());
|
||||
code.insert("ACT".to_string(), "T".to_string());
|
||||
code.insert("ACC".to_string(), "T".to_string());
|
||||
code.insert("ACA".to_string(), "T".to_string());
|
||||
code.insert("ACG".to_string(), "T".to_string());
|
||||
code.insert("AAT".to_string(), "N".to_string());
|
||||
code.insert("AAC".to_string(), "N".to_string());
|
||||
code.insert("ATG".to_string(), "M".to_string());
|
||||
code.insert("AAA".to_string(), "K".to_string());
|
||||
code.insert("AAG".to_string(), "K".to_string());
|
||||
code.insert("AGT".to_string(), "S".to_string());
|
||||
code.insert("AGC".to_string(), "S".to_string());
|
||||
code.insert("AGA".to_string(), "R".to_string());
|
||||
code.insert("AGG".to_string(), "R".to_string());
|
||||
code.insert("GTT".to_string(), "V".to_string());
|
||||
code.insert("GTC".to_string(), "V".to_string());
|
||||
code.insert("GTA".to_string(), "V".to_string());
|
||||
code.insert("GTG".to_string(), "V".to_string());
|
||||
code.insert("GCT".to_string(), "A".to_string());
|
||||
code.insert("GCC".to_string(), "A".to_string());
|
||||
code.insert("GCA".to_string(), "A".to_string());
|
||||
code.insert("GCG".to_string(), "A".to_string());
|
||||
code.insert("GAT".to_string(), "D".to_string());
|
||||
code.insert("GAC".to_string(), "D".to_string());
|
||||
code.insert("GAA".to_string(), "E".to_string());
|
||||
code.insert("GAG".to_string(), "E".to_string());
|
||||
code.insert("GGT".to_string(), "G".to_string());
|
||||
code.insert("GGC".to_string(), "G".to_string());
|
||||
code.insert("GGA".to_string(), "G".to_string());
|
||||
code.insert("GGG".to_string(), "G".to_string());
|
||||
return code;
|
||||
}
|
|
@ -32,8 +32,6 @@ use regex::Regex;
|
|||
|
||||
// functions
|
||||
|
||||
|
||||
|
||||
// print help menu
|
||||
fn print_usage(program: &str, opts: Options) {
|
||||
let brief = format!("Usage: {} FILE [options]", program);
|
||||
|
@ -59,6 +57,25 @@ fn filler () -> Vec<char> {
|
|||
return signs
|
||||
}
|
||||
|
||||
// fill vector with special character only
|
||||
fn spcl () -> Vec<char> {
|
||||
let mut special: Vec<char> = vec![];
|
||||
// fill vector with special character
|
||||
for i in 33u8..47u8 {
|
||||
special.push(i as char)
|
||||
}
|
||||
for i in 58u8..64u8 {
|
||||
special.push(i as char)
|
||||
}
|
||||
for i in 91u8..96u8 {
|
||||
special.push(i as char)
|
||||
}
|
||||
for i in 123u8..126u8 {
|
||||
special.push(i as char)
|
||||
}
|
||||
return special
|
||||
}
|
||||
|
||||
// creation of passward with certain length
|
||||
fn pw (pass_length: &usize, signs: &Vec<char>) -> String {
|
||||
let mut password = String::new(); // assign password as empty string
|
||||
|
@ -130,12 +147,16 @@ fn main() {
|
|||
//let test: char = '5';
|
||||
//println!("first {}", control);
|
||||
|
||||
let mut special: Vec<char> = spcl(); // fill vector with special character
|
||||
let mut lowcase = 0; // counter for lowercase
|
||||
let mut upcase = 0; // counter for upptercase
|
||||
let uprex = Regex::new(r"[A-Z]").unwrap(); // regex uppercase
|
||||
let lowex = Regex::new(r"[a-z]").unwrap();// regex lowercase
|
||||
assert!(uprex.is_match("A")); // testing regex
|
||||
assert!(lowex.is_match("b"));
|
||||
//assert!(uprex.is_match("A")); // testing regex
|
||||
//assert!(lowex.is_match("b"));
|
||||
/*for i in &special {
|
||||
println!("{}", i);
|
||||
}*/
|
||||
|
||||
let mut dup: f64 = 0.0; // number of same character in row
|
||||
// verification of password
|
||||
|
@ -156,13 +177,15 @@ fn main() {
|
|||
// different cases
|
||||
let cstr = c.to_string(); // convert <char> into <String>
|
||||
if uprex.is_match(&cstr) {
|
||||
println!("uppercase letter {}", c);
|
||||
//println!("uppercase letter {}", c);
|
||||
upcase = 1;
|
||||
} else if lowex.is_match(&cstr) {
|
||||
println!("lowercase letter {}", c);
|
||||
//println!("lowercase letter {}", c);
|
||||
lowcase = 1;
|
||||
}
|
||||
|
||||
// special character??
|
||||
|
||||
//println!("{} vs {}", control, c);
|
||||
control = c.clone();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue