#!/usr/bin/perl -w # # Gene-Splitting Web Tool: Design the primers for splitting a gene, # and report some other useful information too # Copyright (C) 2007, Jim Dickson, Andrew Martens # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # use CGI; my $page = new CGI; print $page->header( -type => 'text/html'); #print "Content-type: text/html \n\n"; print $page->start_html(-title => 'Gene Splitting', -style => { -src => 'style.css'} ); #the DNA sequence of the gene to be split my $seq = $page->param("textfield"); $seq = clean($seq); #the extra nucleotide, to prevent a frameshift my $x = $page->param("RadioGroup1"); #the point of insertion, by amino acid number my $cut = $page->param("textarea"); $cut--; my $cut2 = $cut + 1; #the width of the output for printing sequences my $len = 39; if (length($seq) < 40) { print "

Error: the length of your sequence is less than 40 base pairs.

"; break(); } if ( ($cut < 21) || (length($seq)-$cut < 21) ) { print "

Error: your cut is too close to the extremities of your protein.

"; break(); } if (length($seq) < 3 * $cut + 21) { print "

Error: your cut is either greater than the length of your sequence, or it is too close to the carboxyl terminus.

"; break(); } #define the elements in the biobricks: (global) #4 bases - sienna $bases = GCAT; #EcoRI - red $ecori = GAATTC; #PstI - green $psti = CTGCAG; #NotI - orange #there are several ones here because it overlaps with other sites $noti1 = GCGGCCGC; $noti2 = CGGCCGC; $noti3 = GCGGCCG; #Spacer $spaceT = T; $spaceA = A; #XbaI - cyan $xbai = TCTAGA; #SpeI - dark blue $spei = ACTAGT; #scar sequence $seqScar = ACTAGA; #hix sequence $seqHix = TTATCAAAAACCATGGTTTTTGATAA; my $seqScarHixScarXTrans = dna2protein($seqScar.$seqHix.$seqScar.$x); #extra nucleotide = indigo, already defined as $x my $primerA = $bases . $ecori . $noti1 . $spaceT . $xbai; #GCATGAATTCGCGGCCGCTTCTAGA my $primerB = $bases . $psti . $noti2 . $spaceT . $spei; #GCATCTGCAGCGGCCGCTACTAGT my $primerC = $primerA; #GCATGAATTCGCGGCCGCTTCTAGA my $primerD = $primerB; #GCATCTGCAGCGGCCGCTACTAGT my $primerA20 = substr($seq, 0, 25); my $primerC20 = substr($seq, 3*($cut+1), 25); #compute reverse strand my $revcomp = reverse $seq; #find its complement $revcomp =~ tr/ACGTacgt/TGCAtgca/; my $primerD20 = substr($revcomp, 0, 25); my $bhelper = substr($seq, 3*($cut+1)-25, 25); my $revcompb = reverse $bhelper; $revcompb =~ tr/ACGTacgt/TGCAtgca/; my $primerB20 = substr($revcompb, 0, 25); # these 4 strings now contain all the primer information, # with the restriction sites on the left and 25 nucleotides following # we will later use melting pt formulas to decide if we want them a little shorter (20-25 range) $primerA = $primerA . $primerA20; $primerB = $primerB . $primerB20; $primerC = $primerC . $x . $primerC20; $primerD = $primerD . $primerD20; #the DNA sequences before and after the split my $seq1st = substr($seq, 0, 3*$cut2); my $seq2nd = substr($seq, 3*$cut2, length($seq) - 3*$cut2); #compute best lengths, melting points for primers my @ansA_B = bestMelt($primerA20, $primerB20); my @ansC_D = bestMelt($primerC20, $primerD20); #modify the lengths based on computed melting temperatures $primerA20 = substr($primerA20, 0, $ansA_B[0]); $primerB20 = substr($primerB20, 0, $ansA_B[1]); $primerC20 = substr($primerC20, 0, $ansC_D[0]); $primerD20 = substr($primerD20, 0, $ansC_D[1]); #1st portion - create this variable to calculate length my $Pcr1 = $ecori . $noti1 . $spaceT . $xbai . $seq1st . $spei . $spaceA . $noti3 . $psti; #2nd portion - create this variable to calculate length my $Pcr2 = $ecori . $noti1 . $spaceT . $xbai . $x . $seq2nd . $spei . $spaceA . $noti3 . $psti; # final sequence of gene + insertion my $finalsequence = $seq1st . $seqScar . $seqHix . $seqScar . $x . $seq2nd; #print: biobrick-gene1-scar-hix-scar-gene2-biobrick my $finlig = $ecori . $noti1 . $spaceT . $xbai . $finalsequence . $spei . $spaceA . $noti1 . $psti; #Output to web page, most calculations should have been performed by now print "

Original DNA Sequence: (length " . length($seq). " base pairs)

"; print '
'; printWrap($seq); print '
'; print "

Gene sequence left of the split: (length " . length($seq1st) . " base pairs)

"; print '
'; printWrap($seq1st); print '
'; print "

Gene sequence right of the split: (length " . length($seq2nd) . " base pairs)

"; print '
'; printWrap($seq2nd); print '
'; print << "primeroutput";

Amino acid number of insertion point: $cut2

Extra Base: $x

Primers:

Primer A (length $ansA_B[0] bp):

$bases$ecori$noti1$spaceT$xbai$primerA20

Primer B (length $ansA_B[1] bp):

$bases$psti$noti2$spaceA$spei$primerB20

Primer C (length $ansC_D[0] bp):

$bases$ecori$noti1$spaceT$xbai$x$primerC20

Primer D (length $ansC_D[1] bp):

$bases$psti$noti2$spaceA$spei$primerD20

Four added bases, EcoRI site, NotI site, PstI site, XbaI site, SpeI site, added nucleotide


primeroutput print "

Melting temperature for primer A (length $ansA_B[0] bp):

"; print "

"; printf("%.2f", $ansA_B[2]); print " degrees Celsius

"; print "

"; printf("%.2f", $ansA_B[4]); print "% GC content

"; print "

Melting temperature for primer B (length $ansA_B[1] bp):

"; print "

"; printf("%.2f", $ansA_B[3]); print " degrees Celsius

"; print "

"; printf("%.2f", $ansA_B[5]); print "% GC content

"; print "

Melting temperature for primer C (length $ansC_D[0] bp):

"; print "

"; printf("%.2f", $ansC_D[2]); print " degrees Celsius

"; print "

"; printf("%.2f",$ansC_D[4]); print "% GC content

"; print "

Melting temperature for primer D (length $ansC_D[1] bp):

"; print "

"; printf("%.2f", $ansC_D[3]); print " degrees Celsius

"; print "

"; printf("%.2f", $ansC_D[5]); print "% GC content

"; print << "moreoutput";

Scar Sequence:

$seqScar

Hix Sequence:

$seqHix

Inserted amino acids (scar + hix + scar + 39th base):

$seqScarHixScarXTrans

moreoutput #print the two intermediary pieces that result from the PCR reaction print "

The 1st PCR product: (length " . length($Pcr1) . " base pairs)

\n"; bioBrick($seq1st); print "

The 2nd PCR product: (length " . length($Pcr2) . " base pairs)

\n"; bioBrick($x . $seq2nd); print "

The final, ligated product: (length " . length($finlig) . " base pairs)

\n"; bioBrick($finalsequence); print "
"; # this final sequence should not contain any of the following: # EcoRI restriction site, PstI restriction site, XbaI restriction site, NotI restriction site, SpeI restriction site, XbaI methylation site. if ($finalsequence =~ m/$ecori/i) { print "Warning: EcoRI sequence detected!
\n"; } if ($finalsequence =~ m/$noti1/i) { print "Warning: NotI sequence detected!
\n"; } if ($finalsequence =~ m/$xbai/i) { print "Warning: XbaI sequence detected!
\n"; } if ($finalsequence =~ m/$spei/i) { print "Warning: SpeI sequence detected!
\n"; } if ($finalsequence =~ m/$psti/i) { print "Warning: PstI sequence detected!
\n"; } if ($finalsequence =~ m/actagatc/i) { print "Warning: XbaI methylation sequence detected!
\n"; } if ($finalsequence =~ m/gatctagt/i) { print "Warning: XbaI methylation sequence detected!
\n"; } #highlight the hix, scar, extra print "

Nucleotides to be translated: (length " . length($finalsequence) . " base pairs)

\n"; print "
"; $remaining = $len; #print the seq1st portion $remaining = printWrap($seq1st, $remaining, $len, "black"); #now, get ready to print colored parts $remaining = printWrap($seqScar, $remaining, $len, "blue"); $remaining = printWrap($seqHix, $remaining, $len, "red"); $remaining = printWrap($seqScar, $remaining, $len, "blue"); #print the extra nucleotide print ""; $remaining = printWrap($x, $remaining, $len, "purple"); print ""; #print the 2nd half of the original sequence $remaining = printWrap($seq2nd, $remaining, $len, "black"); print "

"; print "

Scar sequence, hix sequence, extra nucleotide

\n"; #print: translation of final DNA sequence with inserted hix site #color amino acids derived from the scars, hix, extra base my $finalTranslation = dna2protein($finalsequence); print "

Translated sequence: (length "; print length($finalTranslation); print " amino acids and stops)

"; dna2protein_color($finalsequence, $cut2); print "

Inserted amino acids (from scars and hix)

\n"; print $page->end_html; exit(0); #converts dna sequence into translated amino acid sequence sub dna2protein{ my @protein; my $ProteinString; my $base; my @codon; %aa_table = ( TTT => 'F', TTC => 'F', TTA => 'L', TTG => 'L', TCT => 'S', TCC => 'S', TCA => 'S', TCG => 'S', TAT => 'Y', TAC => 'Y', TAA => '*', TAG => '*', TGT => 'C', TGC => 'C', TGA => '*', TGG => 'W', CTT => 'L', CTC => 'L', CTA => 'L', CTG => 'L', CCT => 'P', CCC => 'P', CCA => 'P', CCG => 'P', CAT => 'H', CAC => 'H', CAA => 'Q', CAG => 'Q', CGT => 'R', CGC => 'R', CGA => 'R', CGG => 'R', ATT => 'I', ATC => 'I', ATA => 'I', ATG => 'M', ACT => 'T', ACC => 'T', ACA => 'T', ACG => 'T', AAT => 'N', AAC => 'N', AAA => 'K', AAG => 'K', AGT => 'S', AGC => 'S', AGA => 'R', AGG => 'R', GTT => 'V', GTC => 'V', GTA => 'V', GTG => 'V', GCT => 'A', GCC => 'A', GCA => 'A', GCG => 'A', GAT => 'D', GAC => 'D', GAA => 'E', GAG => 'E', GGT => 'G', GGC => 'G', GGA => 'G', GGG => 'G', ); #obtain the DNA sequence as a string $dna = shift(@_); #Convert the String into an array before translating chomp $dna; @seq = split ("", $dna); my $count = 0; while ($count <= $#seq) { $base = $seq[$count]; if($base eq 'A' || $base eq 'C' || $base eq 'G' || $base eq 'T') { push(@codon, $base); } if($#codon == 2) { push(@protein, $aa_table{join("",@codon)}); undef(@codon); } $count++; } $ProteinString = join("",@protein); return $ProteinString; } #with a given insertion point, will print the sequence 1st fragment, its following 13 characters as green, and the sequence 2nd fragment #this is used when printing the protein sequence with the insert colored green sub dna2protein_color{ my $dna = $_[0]; my $insPt = $_[1]; my $len = 39; my $remaining = $len; #get the translated protein my $ProteinString = dna2protein($dna); #the 1st part, the insertion, the 2nd part my $prot1 = substr($ProteinString, 0, $insPt); my $inserted = substr($ProteinString, $insPt, 13); my $prot2 = substr($ProteinString, $insPt+13, length($ProteinString)); #begin output print "
"; #amino acids before insertion $remaining = printWrap($prot1, $remaining, $len, "black"); #the amino acids that are colored print ""; $remaining = printWrap($inserted, $remaining, $len, "green"); print ""; #amino acids after insertion $remaining = printWrap($prot2, $remaining, $len, "black"); print "
"; } #remove everything but A,T,C,G sub clean{ my ($input) = @_; $input = "\U$input"; $input =~ s/[^ATCG]//g; return $input; } #remove everything but G, C sub cleangc{ my ($input) = @_; $input = "\U$input"; $input =~ s/[^CG]//g; return $input; } #remove everything but A, T sub cleanat{ my ($input) = @_; $input = "\U$input"; $input =~ s/[^AT]//g; return $input; } #precondition: two inputs are DNA strands, length 25 #postcondition: returns best lengths, corresponding temperatures sub bestMelt{ my ($input1) = $_[0]; my ($input2) = $_[1]; my $besti=20; my $bestj=20; my $bestMelt1=100000; my $bestMelt2=100000; my $bestDiff =100000; #really big, ideally infinity for (my $i = 20; $i < 26; $i++) { my $str1 = substr($input1, 0, $i); my $melt1 = Proligo($str1); for (my $j = 20; $j < 26; $j++) { my $str2 = substr($input2, 0, $j); my $melt2 = Proligo($str2); my $diff = ($melt1 - $melt2)**2; #comparing differences with squares if ($diff < $bestDiff) { $besti = $i; $bestj = $j; $bestDiff = $diff; $bestMelt1 = $melt1; $bestMelt2 = $melt2; } } } #return i, j #melt(a length i), melt(b length j) #gc content 1, gc content 2 $answer[0] = $besti; $answer[1] = $bestj; $answer[2] = $bestMelt1; $answer[3] = $bestMelt2; #compute GC content my $gc1 = cleangc(substr($input1,0,$besti)); my $at1 = cleanat(substr($input1,0,$besti)); my $gcPercent1 = length($gc1)/$besti * 100; my $gc2 = cleangc(substr($input2,0,$bestj)); my $at2 = cleanat(substr($input2,0,$bestj)); my $gcPercent2 = length($gc2)/$bestj * 100; $answer[4] = $gcPercent1; $answer[5] = $gcPercent2; return @answer; } #precondition: input is DNA strand #postcondition: returns its melting temperature, taking into account salt cxn sub saltMelt{ my ($input) = @_; #use saltcxn-based formula my $na = 0.025; # my $k = 0.025; #the two add up to 50 mM my $gc = cleangc($input); my $at = cleanat($input); my $melting_Temperature = 81.5 + ( 16.6 * log($na + $k)/log(10) ) + ( 41 * length($gc) / length($input) ) - ( 675 / length($input) ); return $melting_Temperature; } #the simpler formula, for < 20 bp sub simpleMelt { my ($input) = @_; my $gc = cleangc($input); my $at = cleanat($input); my $answer = (4*length($gc) + 2*length($at)); return $answer; } #compute a melting point by using Proligo's formula sub Proligo { #entropy values for melting point calculations %dS = ( AA => '24', AC => '17.3', AG => '20.8', AT => '23.9', CA => '12.9', CC => '26.6', CG => '27.8', CT => '20.8', GA => '13.5', GC => '26.7', GG => '26.6', GT => '17.3', TA => '16.9', TC => '13.5', TG => '12.9', TT => '24'); #enthalpy values for melting point calculations %dH = ( AA => '9.1', AC => '6.5', AG => '7.8', AT => '8.6', CA => '5.8', CC => '11', CG => '11.9', CT => '7.8', GA => '5.6', GC => '11.1', GG => '11', GT => '6.5', TA => '6', TC => '5.6', TG => '5.8', TT => '9.1'); my ($seq) = @_; if (length($seq) > 0) { #use simple formula for <= 20 if (length($seq) <= 20) { return simpleMelt($seq); } else { my $sigma_dH = 0; my $sigma_dS = 0; for (my $i = 0; $i < length($seq) - 1; $i++) { my $sub = substr($seq, $i, 2); $sigma_dH += $dH{$sub}; $sigma_dS += $dS{$sub}; } return (-1000 * $sigma_dH / (-10.8 - $sigma_dS + 1.987 * -23.5) - 273.15 + 16.6 * -1.3); } } else { return -1; } } #this outputs the given string, flanked by colored biobrick ends #and formatted to be 39 characters wide sub bioBrick { my ($seq) = @_; my $len = 39; my $remaining = $len; print "

"; #first biobricks $remaining = printWrap($ecori, $remaining, $len, "red"); $remaining = printWrap($noti1, $remaining, $len, "orange"); $remaining = printWrap($spaceT, $remaining, $len, "black"); $remaining = printWrap($xbai, $remaining, $len, "cyan"); #the gene's sequence $remaining = printWrap($seq, $remaining, $len, "black"); #the last colored part $remaining = printWrap($spei, $remaining, $len, "blue"); $remaining = printWrap($spaceT, $remaining, $len, "black"); $remaining = printWrap($noti3, $remaining, $len, "orange"); $remaining = printWrap($psti, $remaining, $len, "green"); print "

"; } #given a string and the amount of space left on a line: print as much of the string as fits on the line, then
and the remainder. returns the number of characters remaining on the line #this also supports color information #uses a recursive method sub printWrap { # only 1 arg: assume $remaining == $len, $len = 39, font is black if (@_ == 1) { my $len = 39; my $remaining = $len; $remaining = printWrap(@_, $remaining, $len, "black"); return $remaining; } #we have 3 arguments: the sequence, # chars left on the line, and the length of a line elsif (@_ == 3 ) { my ($seq) = $_[0]; my ($remaining) = $_[1]; my ($len) = $_[2]; if ( length($seq) < $remaining) { print $seq; $remaining -= length($seq); } #use a recursive method else { #print however much fits on this line print substr($seq, 0, $remaining) . "
"; #this is how much of the string we have left for the new line my $nextln = substr($seq, $remaining, length($seq)); #reset remaining on the line to $len $remaining = $len; #recursive call $remaining = printWrap($nextln, $remaining, $len); } #return the amount remaining on the line in case we have more things to print return $remaining; } #4 arguments: 4th is color, other three are sent through the 3 argument subroutine elsif (@_ == 4) { print ''; #recursion my $remaining = printWrap($_[0], $_[1], $_[2]); print ""; return $remaining; } }