• No results found

A t el es cope f or t he RNA uni ver s e

N/A
N/A
Protected

Academic year: 2021

Share "A t el es cope f or t he RNA uni ver s e"

Copied!
2
0
0

Bezig met laden.... (Bekijk nu de volledige tekst)

Hele tekst

(1)

Cover Page

The handle http://hdl.handle.net/1887/38825 holds various files of this Leiden University dissertation

Author: Pulyakhina, Irina

Title: A telescope for the RNA universe : novel bioinformatic approaches to analyze RNA sequencing data

Issue Date: 2016-04-21

(2)

if (@ARGV < 2) { print "USAGE: $0 <annotation file> <SAM file> <cutoff (optional, default=500)>\n\n"; exit; } my @splice_sites = (); my $last_line = "";

open (F, $ARGV[0]);

while (<F>)

{chomp; $last_line = $_ if eof; my @temp = split '\t', $_;

if ($temp[0] =~ /^exon/) { if ($temp[3] > $temp[2]) { push (@splice_sites, $temp[2]);}

if ($temp[0] =~ /^exon/) { if ($temp[3] > $temp[2]) { push (@splice_sites, $temp[2]);}

if ($temp[3] < $temp[2]) { push (@splice_sites, $temp[3]);} }

elsif ($temp[0] =~ /^intron/) { if ($temp[3] > $temp[2]) { push (@splice_sites, $temp[2]);}

if ($temp[3] < $temp[2]) { push (@splice_sites, $temp[3]);}

}if ($last_line)

{my @temp = split '\t', $_;

my @temp = split '\t', $_;

if ($temp[3] > $temp[2]) { push (@splice_sites, $temp[3]);}

if ($temp[3] < $temp[2]) { push (@splice_sites, $temp[2]);}

}} close (F);

my %pair1 = (); my %pair2 = (); my %data = (); my @sorted_splice_starts = reverse sort {$b <=> $a} @splice_sites;

open (SAM, $ARGV[1]);

open (SAM, $ARGV[1]);

while (<SAM>)

{chomp; if ($_ =~ /^@/) { next; } else{

my @temp = split '\t', $_; my $coord1 = $temp[3]; $temp[5] =~ s/\d+S//; $temp[5] =~ s/\d+H//;

my @temp_ar1 = $temp[5] =~ /(\d+)/g; my $score = 0;

my @temp_ar1 = $temp[5] =~ /(\d+)/g; my $score = 0;

foreach my $i (0..$#temp_ar1) { $score += $temp_ar1[$i]; } my $coord2 = $coord1+$score; print " $coord2\n";

if (($temp[1] & 0x0040) > 0) # the read is the first read in a pair

{ $pair1{$temp[0]} = "$temp[3]"."__"."$coord2"; print "$pair1{$temp[0]}\n"; push (@ { $data{$temp[0]} }, $_); } else # the read is the second read in a pair {$pair2{$temp[0]} = "$temp[3]"."__"."$coord2";

push (@ { $data{$temp[0]} }, $_);} } } close (SAM);

my $cutoff = 650; if ($ARGV[2]) { $cutoff = $ARGV[2]; } my $cutoff = 650; if ($ARGV[2]) { $cutoff = $ARGV[2]; }

open (small_overhang, ">$ARGV[1].small_overhang.$cutoff"); open (norm_pre, ">$ARGV[1].norm_pre.$cutoff");

open (norm_int, ">$ARGV[1].norm_int.$cutoff"); open (norm_post, ">$ARGV[1].norm_post.$cutoff");

open (large_int, ">$ARGV[1].large_int.$cutoff"); open (large_post, ">$ARGV[1].large_post.$cutoff");

open (large_same_exon, ">$ARGV[1].large_same_exon.$cutoff"); open (un_known, ">$ARGV[1].unknown.$cutoff");

open (un_classified, ">$ARGV[1].unclassified.$cutoff");

my $paired_mapped_reads = 0; my $unclassified_reads = 0; my $total_reads = 0;

foreach my $p (keys %pair1) foreach my $p (keys %pair1) {if ($pair2{$p})

{$paired_mapped_reads += 1; $pair1{$p} =~ /(\d+)\_\_(\d+)/; my $read1_start = $1; my $read1_end = $2;

my $s1_tag = "no"; my $s1_index = "no"; my $e1_tag = "no"; my $e1_index = "no";

$pair2{$p} =~ /(\d+)\_\_(\d+)/; my $read2_start = $1; my $read2_end = $2; my $s2_tag = "no"; my $s2_index = "no";

my $e2_tag = "no"; my $e2_index = "no";

my $e2_tag = "no"; my $e2_index = "no";

if ($read1_start < $sorted_splice_starts[0]) { $s1_tag = "intron"; $s1_index = "-1"; } elsif ($read1_start > $sorted_splice_starts[-1]) { $s1_tag = "intron"; $s1_index = "-3"; } else{

#print "$read1_start and $read1_end and $read2_start and $read2_end\n";

foreach my $i (0..$#sorted_splice_starts) {

{if (($s1_tag eq "no") and ($read1_start < $sorted_splice_starts[$i]))

if ($i % 2) { $s1_tag = "exon"; $s1_index = ($i/2 + 1/2); } else { $s1_tag = "intron"; $s1_index = ($i/2); }

my $dist1 = abs($sorted_splice_starts[$i-1] - $read1_start); my $dist2 = abs($sorted_splice_starts[$i] - $read1_start);

if (($dist1 < 5) or ($dist2 < 5)) { print small_overhang "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; next; } } } } if ($read1_end < $sorted_splice_starts[0]) { $e1_tag = "intron"; $e1_index = "-1"; }

elsif ($read1_end > $sorted_splice_starts[-1]) { $e1_tag = "intron"; $e1_index = "-3"; } my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

my @sorted_temp_ar = sort ($read1_start, $read1_end, $read2_start, $read2_end);

my $dist = abs($sorted_temp_ar[2] - $sorted_temp_ar[1]);

print "$sorted_temp_ar[2] - $sorted_temp_ar[1]\n";

print "READ1_START $read1_start\n";

my $read1 = read_class($s1_tag, $e1_tag, $s1_index, $e1_index);

#print "READ1 $s1_tag, $e1_tag, $s1_index, $e1_index\nREAD2 $s2_tag, $e2_tag, $s2_index, $e2_index\n\n";

my $read2 = read_class($s2_tag, $e2_tag, $s2_index, $e2_index);

# classification

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

# classification

if ((($read1 eq "intron") and ($read2 eq "intron")) or (($read1 eq "intron") and ($read2 eq "exon")) or (($read2 eq "intron") and ($read1 eq "exon")) or (($read1 eq "intron") and ($read2 eq "int-ex")) or (($read2 eq "intron") and ($read1 eq "int-ex"))) {if (($dist) < $cutoff) { print norm_pre "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

else { print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}elsif ((($read1 eq "intron") and ($read2 eq "ex-ex")) or (($read2 eq "intron") and ($read1 eq "ex-ex")) or (($read1 eq "ex-ex") and ($read2 eq "int-ex")) or (($read2 eq "ex-ex") and ($read1 eq "int-ex"))) {

{if (($dist) < $cutoff) { print "NORM $dist\n"; print norm_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; } else { print "LARGE $dist\n"; print large_int "$data{$p}[0]\n$data{$p}[1]\n"; $total_reads += 1; }

}

A t el es cope f or t he RNA uni ver s e

Novel bi oi nf or mat i c appr oaches t o

anal yze RNA s equenci ng dat a

Referenties

GERELATEERDE DOCUMENTEN

Als de kandidaat de vertaalslag naar RNA heeft gemist (twee maal T in plaats van U), maar de 5´ en 3´ uiteinden goed heeft aangegeven, wordt 1 scorepunt toegekend. 32

When selecting for large contigs (.500 nucleotides), a number of novel rainbow trout gene sequences were identified in this study: 1,085 and 1,228 novel gene sequences for red and

In addition to the essential RdRp domain motifs mentioned above, CoV nsp12 has a motif G, which is consid- ered a signature sequence for primer-dependent RNA polymerases (Gorbalenya

-Soort aminozuur - Aantal aminozuren - Volgorde aminozuren Codon: groepje nucleotiden dat. codeert voor

The RNA world hypothesis suggests that RNA molecules were generated from prebiotic components enabling the generation of early life forms4. The catalytic and informational

With bulk RNAseq is possible to determine the averaged expression of antiviral genes in host cell populations as a response of bacterial infection.. This approach assumes

Indeed, RNA secondary structure analysis of the SARS-CoV genomic 3' UTR identified a hairpin structure that overlaps with a pseudoknot (Fig. 2-2) and is similar to the structures

SNPs were selected based on different criteria like genotype call rate, minor allele frequency, Hardy –Weinberg equilibrium and linkage disequilibrium. A panel of 50 SNPs was