<?php
/*******************************************************************************
 *
 * LEIDEN OPEN VARIATION DATABASE (LOVD)
 *
 * Created     : 2004-09-21
 * Modified    : 2009-02-03
 * Version     : 2.3
 * For LOVD    : 2.0-15
 *
 * Access      : Public
 * Purpose     : Creates intronic and coding DNA reference sequences from a
 *               specified input format.
 *
 * Copyright   : 2004-2009 Leiden University Medical Center; http://www.LUMC.nl/
 * Programmers : Ing. Ivo F.A.C. Fokkema <I.F.A.C.Fokkema@LUMC.nl>
 *               Ir. Gerard C.P. Schaafsma <G.C.P.Schaafsma@LUMC.nl>
 * Last edited : Ir. Gerard C.P. Schaafsma <G.C.P.Schaafsma@LUMC.nl>
 *
 *
 * This file is part of LOVD.
 *
 * LOVD is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LOVD is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LOVD; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *************/

define('ROOT_PATH', '../');
require ROOT_PATH . 'inc-init.php';
require ROOT_PATH . 'inc-lib-form.php';


$LENGTH_LINE = 60;
$sLinemark = str_repeat('         .', ($LENGTH_LINE / 10));
$sLinemarkBack = str_repeat('.         ', ($LENGTH_LINE / 10));

if (!isset($_GET['step'])) {
    $_GET['step'] = '';
}

function lovd_fileCopiesExist($sFileName) {
    //renames existing files up to three copies
    $nDotPos = strpos($sFileName, '.', 3);//start counting at the third position because ROOT_PATH can be included
    if (file_exists(substr_replace($sFileName, '.0' . substr($sFileName, $nDotPos), $nDotPos))) {
        if (file_exists(substr_replace($sFileName, '.1' . substr($sFileName, $nDotPos), $nDotPos))) {
            if (file_exists(substr_replace($sFileName, '.2' . substr($sFileName, $nDotPos), $nDotPos))) {
                unlink(substr_replace($sFileName, '.2' . substr($sFileName, $nDotPos), $nDotPos));
            }
            rename(substr_replace($sFileName, '.1' . substr($sFileName, $nDotPos), $nDotPos), substr_replace($sFileName, '.2' . substr($sFileName, $nDotPos), $nDotPos));
        }
        rename((substr_replace($sFileName, '.0' . substr($sFileName, $nDotPos), $nDotPos)), (substr_replace($sFileName, '.1' . substr($sFileName, $nDotPos), $nDotPos)));
    }
    rename($sFileName, substr_replace($sFileName, '.0' . substr($sFileName, $nDotPos), $nDotPos));
    return $sFileName;
}

// Check presence or writability of the refseq directory
if (!is_dir('../refseq') || !is_writable('../refseq')) {
    require ROOT_PATH . 'inc-top-clean.php';
    print('<SPAN class="S18"><B>LOVD Reference Sequence Parser</B></SPAN><BR><BR>' . "\n\n" .
          'The \'refseq\' directory does not exist or is not writable. Please make sure it exists and that it is world writable, otherwise you can\'t use the Reference Sequence Parser. For more information or troubleshooting, please refer to the <A href="' . ROOT_PATH . 'docs/lovd_scripts/reference_sequence_parser.php" target="_blank">LOVD manual</A>.<BR><BR>' . "\n\n");
    require ROOT_PATH . 'inc-bot-clean.php';
    exit;
}





// Step 1 added by Gerard Schaafsma
if ($_GET['step'] == 1) {
    if (isset($_GET['sent'])) {
       // Error check
        lovd_errorClean();

        // Mandatory fields with their names
        $aCheck = array(
                         'gene' => 'Gene name',
                         'symbol' => 'Gene symbol',
                         'file' => 'Genbank file',
                        );

        foreach ($aCheck as $key => $val) {
            if (empty($_POST[$key])) {
                lovd_errorAdd('Please fill in the \'' . $val . '\' field.');
            }
        }

        // Genesymbol format, based on http://www.gene.ucl.ac.uk/nomenclature/guidelines.html#1.%20Criteria%20for%20symbol%20assignment
        if ($_POST['symbol'] && (!preg_match('/^[A-Z][A-Z0-9]+(_[A-Za-z0-9_-]+)?$/', $_POST['symbol']) || strlen($_POST['symbol']) > 12)) {
            // Error in genesymbol.
            lovd_errorAdd('Incorrect gene symbol. This field can contain up to 12 characters. The offical gene symbol can only contain uppercase letters and numbers, it may be appended with an underscore followed by letters, numbers, hyphens and underscores.');
        }
        
        if (basename($_POST['file']) != $_POST['file']) {
            // Check if filename contains a path (for security reasons)
            lovd_errorAdd('Do not include the path with the GenBank filename');
        }
        
        $sPath = ROOT_PATH . 'genbank/';// directory of GenBank files
        $sOut = '';

        $sFile = $sPath . $_POST['file'];//read the filename, e.g. POMGNT1_DNA.gb
        if (!file_exists($sFile)) {
            // Error in filename
            lovd_errorAdd('The filename you provided does not seem to exist');
        }

        if (!lovd_error()) {
            // All fields filled in, go ahead
            // read each line of the file into an array
            $aGenBank = file($sFile);

            // 2008-12-04; 2.0-15 by Gerard. Select mRNA and CDS field based on
            // transcript and protein id's.
            // First you want to find the mRNA and CDS field corresponding
            // to the provided transcript_id and protein_id
            $nTranscriptID = 0;
            $nProteinID = 0;
            $bGene = false;
            $bGeneFound = false;
            $bmRNA = false;
            $bCDS = false;
            $nRNAField = 0;
            $nCDSField = 0;

            // Go through the array until you found the mRNA and CDS field numbers
            foreach ($aGenBank as $line) {
                if ('/gene="' . $_POST['symbol'] . '"' == preg_replace('/\s+/', '', $line)) {  
                    // we are in the right gene
                    $bGene = true;
                }
                if ((substr($line, 5, 4) == 'mRNA') && $bGene) {
                    // we are now in the mRNA part where the coordinates of the exons are provided
                    $bmRNA = true;
                    $nRNAField++;
                }
                if ('/transcript_id="' . $_POST['transcript_id'] . '"' == preg_replace('/\s+/', '', $line) && $bmRNA) {
                    // we are in the right mRNA field
                    $nTranscriptID = $nRNAField;
                }
                if ((substr($line, 5, 3) == 'CDS') && $bGene) {
                    // we are now in the CDS part where the coordinates of the coding sequence are provided
                    $bCDS = true;
                    $nCDSField++;
                }
                if ('/protein_id="' . $_POST['protein_id'] . '"' == preg_replace('/\s+/', '', $line) && $bCDS) {
                    // we are in the right mRNA field
                    $nProteinID = $nCDSField;
                }
                if ($nTranscriptID && $nProteinID) {
                    // When the mRNA and CDS field numbers are found you can stop
                    $bGene = false;
                    $bmRNA = false;
                    $bCDS = false;
                    break;
                }
            }
            
            // When no transcript_id and/or protein_id were provided
            // the first mRNA field and the first CDS field will be selected
            if (empty($_POST['protein_id']) || empty($_POST['transcript_id'])) {
                $nTranscriptID = 1;
                $nProteinID = 1;
            } else {
                if (!$nTranscriptID && !empty($_POST['transcript_id'])) {
                    lovd_errorAdd('Transcript ID was not found, typing error?');
                }
                if (!$nProteinID && !empty($_POST['protein_id'])) {
                    lovd_errorAdd('Protein ID was not found, typing error?');
                }
            }

            // Now you know the mRNA and CDS field you want, go through the array again
            $sSourcePositions = '';
            $sExonPositions = '';
            $sCDSPositions = '';
            $bGene = false;
            $bGeneFound = false;
            $bmRNA = false;
            $bCDS = false;
            $nRNAField = 0;
            $nCDSField = 0;
            $sProteinID = '';

            // Find the mRNA coordinates
            foreach ($aGenBank as $nCounter => $line) {
                //2.0-13; 2008-10-28; Fixed bug absence of up- and downstream
                if (substr($line, 5, 6) == 'source') {
                    $sSourcePositions .= $line;
                }
                if ('/gene="' . $_POST['symbol'] . '"' == preg_replace('/\s+/', '', $line)) {  
                    // we are in the right gene
                    $bGene = true;
                    $bGeneFound = true;
                }
                if ((substr($line, 5, 4) == 'mRNA') && $bGene) {
                    // we are now in the mRNA part where the coordinates of the exons are provided
                    $bmRNA = true;
                    $nRNAField++;
                }
                // 2008-12-04; 2.0-15 by Gerard
                if ((substr($line, 21, 5) != '/gene') && $bGene && $bmRNA && ($nRNAField == $nTranscriptID)) {
                    // Now we are in the right mRNA field
                    $sExonPositions .= $line;
                }
                if ((substr($line, 21, 5) == '/gene') && $bGene && $bmRNA) {
                    // We reached the end of the mRNA coordinates
                    $bmRNA = false;
                }
                if ((substr($line, 5, 3) == 'CDS') && $bGene) {
                    // we are now in the CDS part where the coordinates of the coding sequence are provided
                    $bCDS = true;
                    $nCDSField++;
                }
                // 2008-12-04; 2.0-15 by Gerard
                if ((substr($line, 21, 5) != '/gene') && $bGene && $bCDS && ($nCDSField == $nProteinID)) {
                    // Now we are in the right CDS field
                    $sCDSPositions .= $line;
                }
                if ((substr($line, 21, 5) == '/gene') && $bGene && $bCDS) {
                    // We reached the end of the CDS coordinates
                    $bCDS = false;
                }
                if (substr($line, 0, 6) == 'ORIGIN') {
                    // from here the sequence is provided
                    $nSeqOffset = $nCounter + 1;
                }
            }

            if (!$bGeneFound) {
                lovd_errorAdd('The gene ' . $_POST['symbol'] . ' was not found in your GenBank file');
            }
            if (!lovd_error()) {
                //2.0-13; 2008-10-28; Fixed bug absence of up- and downstream
                // Put the source start and end position in an array
                // Get rid of source and the brackets
                $sSourcePositions = preg_replace('/[source()]/', '', $sSourcePositions);
                // Get rid of any form of whitespace
                $sSourcePositions = preg_replace('/\s+/', '', $sSourcePositions);
                // write the start and end positions to an array
                $aSourcePositions = explode('..', $sSourcePositions);
                
                // Extract the ORIGIN part of the GenBank file to an array
                $aSequence = array_slice($aGenBank, $nSeqOffset, (count($aGenBank) - 2 - $nSeqOffset));
                // write the sequence to a string
                $sSequence = implode($aSequence, '');
                // Get rid of any form of whitespace
                $sSequence = preg_replace('/\s+/', '', $sSequence);
                // Get rid of numbers
                $sSequence = preg_replace('/\d+/', '', $sSequence);
                
                // Put the exon start and end positions in an array
                // Get rid of mRNA join and the brackets
                $sExonPositions = preg_replace('/[mRNA join()]/', '', $sExonPositions);
                // Get rid of any form of whitespace
                $sExonPositions = preg_replace('/\s+/', '', $sExonPositions);
                // write the exon start and end positions to an array
                $aExonPositionsmRNA = explode(',', $sExonPositions);
                // write the start and end positions to arrays
                for ($i = 0; $i < count($aExonPositionsmRNA); $i++) {
                    $aExonPositionsmRNA[$i] = explode('..', $aExonPositionsmRNA[$i]);
                }
                //2.0-13; 2008-10-28; Fixed bug absence of up- and downstream
                if ($aSourcePositions[0] == $aExonPositionsmRNA[0][0]) {
                    print('No upstream sequence was provided<BR>');
                }
                if ($aSourcePositions[1] == $aExonPositionsmRNA[count($aExonPositionsmRNA) - 1][1]) {
                    print('No downstream sequence was provided<BR>');
                }

                // add an element to the exon positions array, now the indexes are the same as the exon numbers
                $aExonPositionsmRNA[] = array(0, 0);
                sort($aExonPositionsmRNA);
                
                // Put the exon start and end positions of the coding sequence in an array
                // Get rid of CDS join and the brackets
                $sCDSPositions = preg_replace('/[CDS join()]/', '', $sCDSPositions);
                // Get rid of any form of whitespace
                $sCDSPositions = preg_replace('/\s+/', '', $sCDSPositions);
                // write the exon start and end positions to an array
                $aExonPositionsCDS = explode(',', $sCDSPositions);
                // write the start and end positions to arrays
                for ($i = 0; $i < count($aExonPositionsCDS); $i++) {
                    $aExonPositionsCDS[$i] = explode('..', $aExonPositionsCDS[$i]);
                }
                // add an element to the exon positions array, now the indexes are the same as the exon numbers
                $aExonPositionsCDS[] = array(0, 0);
                sort($aExonPositionsCDS);
                
                // find the translation start
                $nStartTransl = $aExonPositionsCDS[1][0] - 1;
                // create the introns positions array
                $aIntronsPositions = array(0 => array(0, 0));
                for ($i = 1; $i < count($aExonPositionsmRNA) - 1; $i++) {
                    $aIntronsPositions[] = array($aExonPositionsmRNA[$i][1] + 1, $aExonPositionsmRNA[$i + 1][0] - 1);
                }
                
                // Create the sequence for step 2
                $nExons = count($aExonPositionsmRNA) - 1;// number of exons
                // add upstream sequence to $sSeqNextStep, wich will be the sequence for step 2
                $sSeqNextStep = substr($sSequence, 0, $aExonPositionsmRNA[1][0] - 1);
                // now for the exons and introns
                for ($i = 1; $i <= $nExons; $i++) {
                    $sSeqNextStep .= "<";
                    // add exon
                    if ($aExonPositionsmRNA[$i][0] <= $nStartTransl && $nStartTransl < $aExonPositionsmRNA[$i][1]) {
                        // if start of translation is in this exon, add a |
                        $sExonWithStartTransl = substr($sSequence, $aExonPositionsmRNA[$i][0] - 1, $aExonPositionsmRNA[$i][1] - $aExonPositionsmRNA[$i][0] + 1);
                        $sExonWithStartTransl = substr_replace($sExonWithStartTransl, "|", $nStartTransl - $aExonPositionsmRNA[$i][0] + 1, 0);
                        $sSeqNextStep .= $sExonWithStartTransl;
                    } else {
                        $sSeqNextStep .= substr($sSequence, $aExonPositionsmRNA[$i][0] - 1, $aExonPositionsmRNA[$i][1] - $aExonPositionsmRNA[$i][0] + 1);
                    }
                    $sSeqNextStep .= ">";
                    if ($i < $nExons) {
                        // add intron
                        $sSeqNextStep .= substr($sSequence, $aIntronsPositions[$i][0] - 1, $aIntronsPositions[$i][1] - $aIntronsPositions[$i][0] + 1);
                    }
                }
                // add downstream sequence
                $sSeqNextStep .= substr($sSequence, $aExonPositionsmRNA[$nExons][1], strlen($sSequence) - $aExonPositionsmRNA[$nExons][1] + 2);
                $sOut .= ($sOut? "\n" : '') . 'Successfully created input for step 2';
            }
        }

        if (!lovd_error()) {
            // Create sequence for step 2
            $_POST['sequence'] = wordwrap($sSeqNextStep, $LENGTH_LINE, "\n", 1);
            if (!isset($_POST['exists'])) {
                $_POST['exists'] = '';
            }
            require ROOT_PATH . 'inc-top-clean.php';
            print('<SPAN class="S15"><B>Step 1 - Import annotated Genbank sequence</B></SPAN><BR><BR>' . "\n\n");
            print('Output for this step :<BR>' . "\n" . str_replace("\n", '<BR>' . "\n", $sOut) . '<BR><BR>' . "\n");

            // To continue to step 2, we need to create a form and send all data.
            print('<FORM action="' . $_SERVER['PHP_SELF'] . '?step=2" method="post">' . "\n" .
                  '  <INPUT type="hidden" name="gene" value="' . $_POST['gene'] . '">' . "\n" .
                  '  <INPUT type="hidden" name="symbol" value="' . $_POST['symbol'] . '">' . "\n" .
                  '  <INPUT type="hidden" name="file" value="' . $_POST['file'] . '">' . "\n" .
                  '  <INPUT type="hidden" name="sequence" value="' . $_POST['sequence'] . '">' . "\n" .
                  '  <INPUT type="hidden" name="exists" value="' . $_POST['exists'] . '">' . "\n" .
                  '  <INPUT type="submit" value="Continue to next step">' . "\n" .
                  '</FORM><BR>' . "\n\n");
            
            require ROOT_PATH . 'inc-bot-clean.php';
            exit;
        }
    } else {
        // Standard settings.
        $_POST['exists'] = 'overwrite';

        // Do we have a gene selected?
        if ($_SESSION['currdb']) {
            $_POST['symbol'] = $_SESSION['currsymb'];
            $_POST['gene'] = $_SETT['currdb']['gene'];
            if ($_SETT['currdb']['genbank'] == 1 && $_SETT['currdb']['genbank_uri']) {
                $_POST['file'] = $_SETT['currdb']['genbank_uri'];
            }
        }

        if (!empty($_GET['symbol'])) {
            $_POST['symbol'] = $_GET['symbol'];
        }
        if (!empty($_GET['gene'])) {
            $_POST['gene'] = $_GET['gene'];
        }
        if (!empty($_GET['file'])) {
            $_POST['file'] = $_GET['file'];
        }
    }

    // Print the form for step 1: import a GenBank file
    require ROOT_PATH . 'inc-top-clean.php';
    print('<SPAN class="S15"><B>Step 1 - Import annotated Genbank sequence to extract genomic sequence of your gene of interest for step 2</B></SPAN><BR><BR>' . "\n\n");
    array('Genbank file', 'Import annotated Genbank sequence to extract genomic sequence of your gene of interest for step 2.');
    lovd_errorPrint();

    print('<FORM action="' . $_SERVER['PHP_SELF'] . '?step=1&amp;sent=true" method="post">' . "\n" .
          '  <TABLE border="0" cellpadding="1" cellspacing="0" width="700">' . "\n");

    $aForm = array();
    $aForm[] = array('POST', '', '', '50%', '50%');
    $aForm[] = array('', 'print', '(All fields are mandatory unless specified otherwise)');
    $aForm[] = 'hr';
    $aForm[] = array('Gene name', 'text', 'gene', '50');
    $aForm[] = 'hr';
    $aForm[] = array('Gene symbol', 'text', 'symbol', '12');
    $aForm[] = 'hr';
    $aForm[] = array('Genbank file', 'text', 'file', '50');
    $aForm[] = array('', 'print', '<SPAN class="form_note">GenBank file should be in the genbank directory.</SPAN>');
    $aForm[] = 'hr';
    $aForm[] = array('Transcript ID', 'text', 'transcript_id', '12');
    $aForm[] = array('', 'print', '<SPAN class="form_note">In GenBank file mRNA annotation, e.g. NM_000070.2</SPAN>');
    $aForm[] = array('Protein ID', 'text', 'protein_id', '12');
    $aForm[] = array('', 'print', '<SPAN class="form_note">In GenBank file CDS annotation, e.g. NP_000061.1<BR><HR></SPAN>');
    $aForm[] = array('', 'print', '<SPAN class="form_note">If you do not fill in both fields, the first mRNA and CDS fields appearing in the file and associated with this gene will be selected.</SPAN>');
    $aForm[] = 'hr';
    $aForm[] = array('', 'submit', 'Continue');
    
    lovd_viewForm($aForm);
    print('</TABLE><BR>' . "\n\n" . '  </FORM>' . "\n\n");
    require ROOT_PATH . 'inc-bot-clean.php';
    exit;
}





//STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-STEP2-
if ($_GET['step'] == 2) {
    // Get sequence from 1 and parse intronic sequences. Prepare sequence for step 3.

    if (isset($_GET['sent'])) {
        // Verification of the sequence

        // Error check
        lovd_errorClean();

        // Mandatory fields with their names
        $aCheck = array(
                         'gene' => 'Gene name',
                         'symbol' => 'Gene symbol',
                         'sequence' => 'Input sequence'
                        );

        // 2008-12-03; 2.0-15; If a genbank file needs to be created, more fields are mandatory.
        if ($_POST['genbankfile']) {
            $aCheck['transcript_id'] = 'Transcript id';
            $aCheck['protein_id'] = 'Protein id';
            $aCheck['db_xref'] = 'db_xref (CDS field: GI number)';
        }

        foreach ($aCheck as $key => $val) {
            if (empty($_POST[$key])) {
                lovd_errorAdd('Please fill in the \'' . $val . '\' field.');
            }
        }

        // Genesymbol format, based on http://www.gene.ucl.ac.uk/nomenclature/guidelines.html#1.%20Criteria%20for%20symbol%20assignment
        if ($_POST['symbol'] && (!preg_match('/^[A-Z][A-Z0-9]+(_[A-Za-z0-9_-]+)?$/', $_POST['symbol']) || strlen($_POST['symbol']) > 12)) {
            // Error in genesymbol.
            lovd_errorAdd('Incorrect gene symbol. This field can contain up to 12 characters. The offical gene symbol can only contain uppercase letters and numbers, it may be appended with an underscore followed by letters, numbers, hyphens and underscores.');
        }

        // Check presence or writability of the genbank directory and if this is necessary (do you want to create a file in GenBank format)
        if ((!is_dir('../genbank') || !is_writable('../genbank')) && $_POST['genbankfile']) {
            lovd_errorAdd('The \'genbank\' directory does not exist or is not writable. Please make sure it exists and that it is world writable, otherwise you can\'t use this step. For more information or troubleshooting, please refer to the <A href="' . ROOT_PATH . 'docs/lovd_scripts/reference_sequence_parser.php" target="_blank">LOVD manual</A>.');
        }



        if (!lovd_error()) {
            // All fields filled in, go ahead.
            $sSeq = str_replace("\r", '', $_POST['sequence']);
            $sSeq = str_replace("\n", '', $sSeq);

            $sUpstream = '';       // The upstream sequence
            $aIntron = array();    // The introns array
            $aExon = array();      // The exons array
            $nExonNumber = 0;      // Exon number
            $nExonNucleotides = 0; // Number of exon nucleotides
            $nStartTranslation = 0;// Where the translation starts
            $where = 'intron';        // Start with the upstream sequence 
            $aExonEnds = array();     // Array with exon ending positions.


            for ($i = 0; $i < strlen($sSeq); $i ++) {
                $s = $sSeq{$i};
                // We will need to loop through the sequence to provided detailed error messages.
                // up and downstream are first considered introns (first and last elements of the 
                // intron array $aIntron)
                switch ($where) {
                    case 'intron' :
                        // We are in an intron.
                        if (preg_match('/[ACGT]/i', $s)) {
                            // We stay in the intron
                            if (empty($aIntron[$nExonNumber])) {
                                $aIntron[$nExonNumber] = '';
                            }
                            $aIntron[$nExonNumber] .= $s;

                        } elseif ($s == '<') {
                            // We are moving into an exon.
                            $where = 'exon';
                            $nExonNumber ++;

                        } else {
                            lovd_errorAdd('Error : Unexpected character \'' . $s . '\' at char ' . ($i + 1));
                            break 2;
                        }
                        break;

                    case 'exon';
                        // We are in an exon.
                        if (preg_match('/[ACGT]/i', $s)) {
                            // We stay in the exon
                            if (empty($aExon[$nExonNumber])) {
                                $aExon[$nExonNumber] = '';
                            }
                            $aExon[$nExonNumber] .= $s;
                            $nExonNucleotides ++;

                        } elseif ($s == '>') {
                            // We are moving into an intron.
                            $where = 'intron';
                            $aExonEnds[$nExonNumber] = $nExonNucleotides;

                        } elseif ($s == '|' && !$nStartTranslation) {
                            // We are starting translation.
                            $aExon[$nExonNumber] .= $s; // The | is included!!
                            $nStartTranslation = $nExonNucleotides + 1;// Need this one later

                        } else {
                            lovd_errorAdd('Error : Unexpected character \'' . $s . '\' at char ' . ($i + 1));
                            break 2;
                        }
                        break;
                }
            }
        }
        if (!lovd_error()) {
            // Fix $aExonEnds (last exon not completely translated)
            // and compensate for $nStartTranslation where nucleotide numbering starts.
            foreach ($aExonEnds as $key => $nEnd) {
                $nEnd -= $nStartTranslation;
                $aExonEnds[$key] = ($nStartTranslation < 0? $nEnd : $nEnd + 1);
            }

            // 2.0-13; 2008-10-30 by Gerard fix bug when no up/downstream sequences are provided
            $sCodingSequence = implode($aExon);// put the whole exon array in a string
            $nEnd  = 0;
            for ($i = $nStartTranslation; $i < $nExonNucleotides; $i += 3) {//$nExonNucleotides should be the same as strlen($sCodingSequence) check this
                if (in_array(strtolower(substr($sCodingSequence, $i, 3)), array('taa', 'tag', 'tga'))) {
                    // stop codon!
                    $nEnd = $i + 3;
                    break;
                }
            }
            
            // All sequences have been parsed and stored. Now create the intron files.
            $sNow = date('F j, Y');
            $sNowHead = date('Y-m-d H:i:s');// Produces a strict warning
            $sPath = ROOT_PATH . 'refseq/';
            $sOut = '';

            /*******************************************************************
            * We will now traverse the intronic sequence array and create the *
            * intron files. I have traded speed for using less memory by not  *
            * using foreach(), which is faster but creates a copy of the      *
            * array. This array can get huge, so I'm not willing to copy the  *
            * array and risk a failure.                                       *
            ******************************************************************/

            reset ($aIntron);
            //2.0-13; 2008-10-30 by Gerard
            $bFilesExisted = false;

            while (list($nIntron, $sIntron) = each($aIntron)) {
                if (!$sIntron) {
                    // No intronic sequence. Wouldn't know why, but whatever.
                    continue;
                }
                
                // Determine the file names
                $sNIntron = str_pad($nIntron, 2, '0', STR_PAD_LEFT);//add a 0 when intron number is 1 digit
                if (!$nIntron) {
                    // First intron is upstream sequence
                    $sFile = $sPath . $_POST['symbol'] . '_upstream.html';
                    $sTitle = 'upstream';
                    $where = 'up';
                } elseif ($nIntron == $nExonNumber) {
                    // Last intron is downstream sequence
                    $sFile = $sPath . $_POST['symbol'] . '_downstream.html';
                    $sTitle = 'downstream';
                    $where = 'down';
                } else {
                    // The real introns
                    $sFile = $sPath . $_POST['symbol'] . '_intron_' . $sNIntron . '.html';
                    $sTitle = strlen($sIntron) . ' nt intron ' . $sNIntron;
                    $where = 'intron';
                }

                if (file_exists($sFile)) {
                    switch ($_POST['exists']) {
                        case 'skip' :
                            // Skip this intron, we already have a file.
                            $sOut .= ($sOut? "\n" : '') . 'Skipped ' . $sTitle . ', file existed';
                            $bFilesExisted = true;
                            continue 2;
                        case 'rename' :
                            // Rename the old file, we create a new intron refseq.
                            $sFile = lovd_fileCopiesExist($sFile);
                            break;
                    }
                }

                // Write to file.
                $fIntron = fopen($sFile, 'w');
                if ($fIntron) {
                    fputs($fIntron, '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"' . "\n" .
                                     '        "http://www.w3.org/TR/html4/loose.dtd">' . "\n" .
                                     '<HTML lang="en">' . "\n" .
                                     '<HEAD>' . "\n" .
                                     '  <TITLE>' . $_POST['gene'] . ' (' . $_POST['symbol'] . ') - ' . $sTitle . ' reference sequence</TITLE>' . "\n" .
                                     '  <META http-equiv="content-type" content="text/html; charset=ISO-8859-1">' . "\n" .
                                     '  <META name="generator" content="LOVD v.' . $_SETT['system']['version'] . '-' . $_SETT['system']['build'] . ' Reference Sequence Parser @ ' . $sNowHead . '">' . "\n" .
                                     '  <META name="LOVD copyright" content="&copy; 2004-2008 LUMC: http://www.LUMC.nl/">' . "\n\n" .
                                     '  <STYLE type="text/css">' . "\n" .
                                     '    body {font-family : Verdana, Helvetica, sans-serif; font-size : 13px;}' . "\n" .
                                     '    pre  {font-family : monospace;}' . "\n" .
                                     '  </STYLE>' . "\n" .
                                     '</HEAD>' . "\n\n" .
                                     '<BODY>' . "\n\n" .
                                     '<HR>' . "\n" .
                                     '<H1 align="center">' . $_POST['gene'] . ' (' . $_POST['symbol']  . ') - ' . $sTitle . ' reference sequence</H1>' . "\n" .
                                     ($where == 'intron'? '<P align="center"><I>(intronic numbering for coding DNA Reference Sequence)</I></P>' . "\n" : '') .
                                     '<HR>' . "\n\n" .
                                     '<PRE>' . "\n");

//UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM
//UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM-UPSTREAM
                    if ($where == 'up') {
                        // Upstream sequence. Geen scheiding in het midden, de gehele sequentie weergegevens als zoals voor de translatie.
                        // Prepare sequence and declare some vars (part I).
                        $sUpstream = strtolower($aIntron[0]);
                        $lUpstream = strlen($sUpstream);
                        // determine the number of exon nucleotides you want to add to the upstream sequence
                        $nExonNuclsToAdd = ($nStartTranslation - 1) % $LENGTH_LINE;
                        // determine the number of upstream nucleotides in the first line: leftover
                        $nLeftover = ($lUpstream + $nExonNuclsToAdd) % $LENGTH_LINE;
                        // Get the exon nucleotides from the exons array;
                        $nExonNuclsStillToAdd = $nExonNuclsToAdd;
                        $n = 1;
                        while (strlen($aExon[$n]) < $nExonNuclsStillToAdd) {
                            $sUpstream .= $aExon[$n];
                            $nExonNuclsStillToAdd -= strlen($aExon[$n]);
                            $n ++;
                        }
                        
                        $sUpstream .= substr($aExon[$n], 0, $nExonNuclsStillToAdd);
                        $sUpstream = strtolower($sUpstream);
                        
                        // determine the number of upstream lines after the first line (the leftover)
                        $nLineMultFactor = (int) (($lUpstream + $nStartTranslation - 1) / $LENGTH_LINE);// could be replaced by floor()??
                        $lUpstream = strlen($sUpstream);
                        
                        // print the first line
                        $sPreSpaces = str_repeat(' ', ($LENGTH_LINE - $nLeftover));// Spaces before the leftover part to be added
                        if ($lUpstream <= $LENGTH_LINE) {
                            // First line is also last line of upstream sequence
                            fputs($fIntron, $sPreSpaces . substr($sLinemarkBack, $LENGTH_LINE - $lUpstream, $lUpstream - $nExonNuclsToAdd) . '   ' . substr($sLinemarkBack,  -$nExonNuclsToAdd, $nExonNuclsToAdd) . "\n");
                            // Determine the preceeding nucleotide number
                            $nPreceedNumber = -($nLineMultFactor*$LENGTH_LINE) - strlen(substr($sUpstream, 0, $lUpstream - $nExonNuclsToAdd)) - strlen(substr($sUpstream, $lUpstream - $nExonNuclsToAdd, $nExonNuclsToAdd));
                            
                            if (strlen($sPreSpaces) > strlen($nPreceedNumber) + 1) {// +1 because of the extra space
                                // Determine if there is enough room for the preceeding nucleotide number
                                $sPreSpaces = str_repeat(' ', ($LENGTH_LINE - $lUpstream - strlen($nPreceedNumber) - 1));
                                fputs($fIntron, $sPreSpaces . $nPreceedNumber . ' ' . substr($sUpstream, 0, $lUpstream - $nExonNuclsToAdd) . ' \\ ' . substr($sUpstream, $lUpstream - $nExonNuclsToAdd, $nExonNuclsToAdd) . ' ' . -($nLineMultFactor*$LENGTH_LINE + 1) . "\n\n");
                            } else {
                                // No preceeding nucleotide number will be printed
                                fputs($fIntron, $sPreSpaces . substr($sUpstream, 0, $lUpstream - $nExonNuclsToAdd) . ' \\ ' . substr($sUpstream, $lUpstream - $nExonNuclsToAdd, $nExonNuclsToAdd) . ' ' . -($nLineMultFactor*$LENGTH_LINE + 1) . "\n\n");
                            }
                        } else {
                            // First line is not the last line
                            fputs($fIntron, $sPreSpaces . substr($sLinemarkBack, -$nLeftover) . "\n");// Print the line with the 10th position marks
                            // Determine the preceeding nucleotide number
                            $nPreceedNumber = -($nLineMultFactor*$LENGTH_LINE) - strlen(substr($sUpstream, 0, $nLeftover));
                            
                            if (strlen($sPreSpaces) > $nPreceedNumber + 1) {// +1 because of the extra space
                                // Determine if there is enough room for the preceeding nucleotide number
                                $sPreSpaces = str_repeat(' ', ($LENGTH_LINE - $nLeftover - strlen($nPreceedNumber) - 1));
                                fputs($fIntron, $sPreSpaces . $nPreceedNumber . ' ' . substr($sUpstream, 0, $nLeftover) . '   ' . -($nLineMultFactor*$LENGTH_LINE + 1) . "\n\n");
                            } else {
                                // No preceeding nucleotide number will be printed
                                fputs($fIntron, $sPreSpaces . substr($sUpstream, 0, $nLeftover) . '   ' . -($nLineMultFactor*$LENGTH_LINE + 1) . "\n\n");
                            }
                        }
                        
                        // print the succeeding lines
                        for ($i = $nLeftover; $i <= $lUpstream - $LENGTH_LINE + 1; $i += $LENGTH_LINE) {
                            $nLineMultFactor --;
                            if ($i == $lUpstream - $LENGTH_LINE) {
                                //Find out if it is the last line
                                fputs($fIntron, substr($sLinemarkBack, 0, $LENGTH_LINE - $nExonNuclsToAdd) . '   ' . substr($sLinemarkBack, $LENGTH_LINE - $nExonNuclsToAdd, $nExonNuclsToAdd) . "\n");
                                fputs($fIntron, substr($sUpstream, $i, $LENGTH_LINE - $nExonNuclsToAdd) . ' \\ ' . substr($sUpstream, $i + $LENGTH_LINE - $nExonNuclsToAdd, $nExonNuclsToAdd) . ' ' . -($nLineMultFactor*$LENGTH_LINE + 1) . "\n\n");
                            } else {
                                fputs($fIntron, $sLinemarkBack . "\n" . substr($sUpstream, $i, $LENGTH_LINE) . '   ' . -($nLineMultFactor*$LENGTH_LINE + 1) . "\n\n");
                            }
                        }
//INTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRON
//INTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRONINTRON
// except for some variable names, this part has not been changed, works OK
                    } elseif ($where == 'intron') {
                        // The 'real' introns
                        // Prepare sequence and declare some vars.
                        $sIntron = strtolower($sIntron);
                        $lIntron = strlen($sIntron);
                        $nMiddle = round($lIntron / 2);// find the middle of the intron
                        $nStart2 = $nMiddle - $lIntron;
                        $lLeftover = -($nStart2 % $LENGTH_LINE);
    
                        // Printing sequence...
                        for ($i = 0; $i + $LENGTH_LINE <= $nMiddle; $i += $LENGTH_LINE) {
                            // Continuing untill the middle of the intron.
                            fputs($fIntron, $sLinemark . "\n" . substr($sIntron, $i, $LENGTH_LINE) . '  ' . ($aExonEnds[$nIntron] <= 0? $aExonEnds[$nIntron] - 1 : $aExonEnds[$nIntron]) . '+' . ($i + $LENGTH_LINE) . "\n\n");
                        }
    
                        // Remaining for the middle.
                        $nRemain = $nMiddle - $i;
    
                        if ($nRemain) {
                            fputs($fIntron, substr($sLinemark, 0, $nRemain) . "\n" . substr($sIntron, $i, $nRemain) . '  ' . ($aExonEnds[$nIntron] <= 0? $aExonEnds[$nIntron] - 1 : $aExonEnds[$nIntron]) . '+' . ($i + $nRemain) . "\n\n");
                        }
                        fputs($fIntron, str_pad(' middle of intron ', $LENGTH_LINE, '-', STR_PAD_BOTH) . "\n");
    
                        // Middle of the intron
                        if ($lLeftover) {
                            // Line markings.
                            $sPreSpaces = str_repeat(' ', $LENGTH_LINE - $lLeftover);
                            fputs($fIntron, $sPreSpaces . substr($sLinemarkBack, -$lLeftover) . "\n");
    
                            // Room left for an nucleotide number?
                            if (strlen($sPreSpaces) > (strlen($aExonEnds[$nIntron]) + strlen($nStart2 - 1) + 1)) {
                                fputs($fIntron, substr($sPreSpaces, 0, ($LENGTH_LINE - $lLeftover - strlen($aExonEnds[$nIntron]) - strlen($nStart2 - 1) - 2)) . ($aExonEnds[$nIntron] < 0? $aExonEnds[$nIntron] : $aExonEnds[$nIntron] + 1) . ($nStart2) . '  ');
    
                            } else {
                                fputs($fIntron, $sPreSpaces);
                            }
    
                            fputs($fIntron, substr($sIntron, $nStart2, $lLeftover) . '  ' . ($aExonEnds[$nIntron] < 0? $aExonEnds[$nIntron] : $aExonEnds[$nIntron] + 1) . ($nStart2 + $lLeftover - 1) . "\n\n");
                        }
    
                        // After the middle.
                        for ($i = ($nStart2 + $lLeftover); $i + $LENGTH_LINE <= 0; $i += $LENGTH_LINE) {
                            // Continuing untill the end of the intron.
                            fputs($fIntron, $sLinemarkBack . "\n" . substr($sIntron, $i, $LENGTH_LINE) . '  ' . ($aExonEnds[$nIntron] < 0? $aExonEnds[$nIntron] : $aExonEnds[$nIntron] + 1) . ($i + $LENGTH_LINE - 1) . "\n\n");
                        }
    
                    } else {
//DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-
//DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-DOWNSTREAM-
                        // Downstream, keep on counting as if the exon refseq goes on...

                        // Prepare sequence and declare some vars (part I).
                        $sIntron = strtolower($sIntron);
                        $lIntron = strlen($sIntron);
                        // We may need to align the downstream nicely, so we may need to copy some nucleotides from the last exon to prepend.
                        // To determine how much to copy, we need to know exactly where the translation stops.
                        // We'll have to loop through the sequence.
                        $sCodingSequence = implode($aExon);// put the whole exon array in a string
                        $nEnd  = 0;
                        for ($i = $nStartTranslation; $i < $nExonNucleotides; $i += 3) {//$nExonNucleotides should be the same as strlen($sCodingSequence) check this
                            if (in_array(strtolower(substr($sCodingSequence, $i, 3)), array('taa', 'tag', 'tga'))) {
                                // stop codon!
                                $nEnd = $i + 3;
                                break;
                            }
                        }
                        $nStart2 = ($nExonNucleotides - $nEnd) + 1;
                        $nExonNuclsToAdd   = $nStart2 % $LENGTH_LINE;
                        $nStart2 = $nStart2 - $nExonNuclsToAdd;
    
                        $sIntron  = strtolower(substr($aExon[count($aExon)], -$nExonNuclsToAdd)) . $sIntron;
                        $lIntron += $nExonNuclsToAdd;
                        $aExonEnds[$nIntron] -= $nExonNuclsToAdd;
    
                        // Printing sequence
                        if ($lIntron <= $LENGTH_LINE) {
                            fputs($fIntron, substr($sLinemark, 0, $nExonNuclsToAdd) . '   ' . substr($sLinemark, $nExonNuclsToAdd, $lIntron - $nExonNuclsToAdd) . "\n");
                            fputs($fIntron, substr($sIntron, 0, $nExonNuclsToAdd) . ' / ' . substr($sIntron, $nExonNuclsToAdd, $lIntron - $nExonNuclsToAdd) . ' *' . $lIntron . "\n\n");
                        } else {
                            for ($i = 0; $i + $LENGTH_LINE <= $lIntron; $i += $LENGTH_LINE) {
                                if (!$i) {
                                    // First line, we may need to indicate the border between the last exon and the downstream sequence
                                    fputs($fIntron, substr($sLinemark, 0, $nExonNuclsToAdd) . '   ' . substr($sLinemark, $nExonNuclsToAdd, $LENGTH_LINE - $nExonNuclsToAdd) . "\n");
                                    fputs($fIntron, substr($sIntron, $i, $nExonNuclsToAdd) . ' / ' . substr($sIntron, $nExonNuclsToAdd, $LENGTH_LINE - $nExonNuclsToAdd) . ' *' . ($nStart2 + $i + $LENGTH_LINE) . "\n\n");
                                } else {
                                    fputs($fIntron, $sLinemark . "\n" . substr($sIntron, $i, $LENGTH_LINE) . '    *' . ($nStart2 + $i + $LENGTH_LINE) . "\n\n");
                                }
                            }
                            // Remainder for the end.
                            $nRemain = $lIntron - $i;// $i has the last value of the previous for loop?
                            if ($nRemain) {
                                fputs($fIntron, substr($sLinemark, 0, $nRemain) . "\n" . str_pad(substr($sIntron, $i, $nRemain), $LENGTH_LINE) . '    *' . (/*$aExonEnds[$nIntron]*/$nStart2 + $i + $nRemain) . "\n\n");
                            }
                        }
                    }

                    $sOut .= ($sOut? "\n" : '') . 'Successfully wrote ' . ($where == 'up'? 'upstream sequence' : ($where == 'intron'? 'intron ' . $sNIntron : 'downstream sequence'));
                    fputs($fIntron, '</PRE>' . "\n\n" .
                                     '<HR>' . "\n" .
                                     '<P align="center" style="font-size : 11px;">' . "\n" .
                                     '  Powered by <A href="http://www.DMD.nl/LOVD/' . $_SETT['system']['version'] . '/" target="_blank">LOVDv.' . $_SETT['system']['version'] . '</A>' . ($_SETT['system']['build']? ' Build ' . $_SETT['system']['build'] : '') . '<BR>' . "\n" .
                                     '  &copy;2004-2008 <A href="http://www.lumc.nl/" target="_blank">Leiden University Medical Center</A>' . "\n" .
                                     '</P>' . "\n" .
                                     '<HR>' . "\n\n" .
                                     '</BODY>' . "\n" .
                                     '</HTML>');
                    fclose($fIntron);
    
                } else {
                    // This really shouldn't happen, as we have checked this already...
                    lovd_errorAdd('Couldn\'t open file to write to for intron ' . $sNIntron);
                }
            }
    
            if (!lovd_error()) {
                // Create sequence for step 3.
                $_POST['sequence'] = wordwrap(implode(';', $aExon), $LENGTH_LINE, "\n", 1);
                require ROOT_PATH . 'inc-top-clean.php';
                print('<SPAN class="S15"><B>Step 2 - Create intronic sequences</B></SPAN><BR><BR>' . "\n\n");

                // Create a table with start and end positions of exons in genomic and coding DNA, including lengths and intron lengths
                // and write to a tab-delimited text file
                $sTableFile = $sPath . $_POST['symbol'] . '_table.txt';
                if (file_exists($sTableFile)) {
                    switch ($_POST['exists']) {
                        case 'skip' :
                            // Skip this file, we already have a file
                            $sOut .= ($sOut? "\n" : '') . 'Skipped creation of table, file existed';
                            $sTableFile = '';
                            break;
                        case 'rename' :
                            // Rename the old file, we create a new refseq
                            $sTableFile = lovd_fileCopiesExist($sTableFile);
                            break;
                    }
                }

                $fTable = fopen($sTableFile, 'w');
                if ($fTable) {
                    if ($bFilesExisted) {
                        //If the intron files were skipped you'll need to adjust $nEnd
                        $sCodingSequence = implode($aExon);// put the whole exon array in a string
                        $nEnd  = 0;
                        for ($i = $nStartTranslation; $i < $nExonNucleotides; $i += 3) {//$nExonNucleotides should be the same as strlen($sCodingSequence) check this
                            if (in_array(strtolower(substr($sCodingSequence, $i, 3)), array('taa', 'tag', 'tga'))) {
                                // stop codon!
                                $nEnd = $i + 3;
                                break;
                            }
                        }
                    }

                    // write the column headers
                    fwrite($fTable, 'exon #' . "\t" . 'c.startExon' . "\t" . 'c.endExon' . "\t" . 'g.startExon' . "\t" . 'g.endExon' . "\t" . 'lengthExon' . "\t" . 'lengthIntron' . "\n");

                    $nStartExonCoding = 1 - $nStartTranslation;     //start of nucleotide numbering exon coding DNA
                    $nEndExonCoding = 0;                            //end of nucleotide numbering exon coding DNA

                    // 2.0-13; 2008-10-29 Added by Gerard to solve bug when no up and/or downstream sequences were provided in the GenBank file
                    if (!isset($aIntron[0])) {
                        // no upstream sequence provided
                        $aIntron[0] = '';
                    }
                    
                    $nStartExonGenomic = 1 + strlen($aIntron[0]);   //start of nucleotide numbering exon genomic DNA
                    $nEndExonGenomic = 0;                           //end of nucleotide numbering exon genomic DNA
                    $lCodingSeq = $nEnd - $nStartTranslation;       //length of the coding sequence (from start to stop codon)
                    $bStopExon = false;                             // flag if translation already stopped

                    for ($i = 1; $i <= count($aExon); $i++) {// start at 1 because first element is upstream sequence
                        $nEndExonCoding = $nStartExonCoding + strlen($aExon[$i]) - 1;
                        $nEndExonGenomic = $nStartExonGenomic + strlen(str_replace("|", "", $aExon[$i])) - 1;
                        
                        // 2.0-13; 2008-10-29 by Gerard to solve bugs when no up and/or downstream sequences are provided
                        if (!isset($aIntron[$i])) {
                            // no downstream sequence provided
                            $aIntron[$i] = '';
                        }
                        
                        if (($nEndExonCoding >= $lCodingSeq) && $nStartExonCoding < $lCodingSeq && $bStopExon == false) {
                            // Translation stops in this exon
                            $bStopExon = true;
                            $nEndExonCoding = $nStartExonCoding + $nStartTranslation + strlen($aExon[$i]) - $nEnd - 1;
                            if ($i == count($aExon)) {
                                // last exon, no intron length to write
                                fwrite($fTable, $i . "\t" . $nStartExonCoding . "\t" . "*" . $nEndExonCoding . "\t" . $nStartExonGenomic . "\t" . $nEndExonGenomic . "\t" . strlen(str_replace("|", "", $aExon[$i])) . "\n");
                            } else {
                                // not the last exon, also write the intron length
                                fwrite($fTable, $i . "\t" . $nStartExonCoding . "\t" . "*" . $nEndExonCoding . "\t" . $nStartExonGenomic . "\t" . $nEndExonGenomic . "\t" . strlen(str_replace("|", "", $aExon[$i])) . "\t" . strlen($aIntron[$i]) . "\n");
                            }
                            $nStartExonCoding = $nEndExonCoding + 1;

                        } elseif ($bStopExon == true) {
                            // Translation stopped in a previous exon
                            if ($i == count($aExon)) {
                                // last exon, no intron length to write
                                fwrite($fTable, $i . "\t" . "*" . $nStartExonCoding . "\t" . "*" . $nEndExonCoding . "\t" . $nStartExonGenomic . "\t" . $nEndExonGenomic . "\t" . strlen(str_replace("|", "", $aExon[$i])) . "\n");
                            } else {
                                // not the last exon, also write the intron length
                                fwrite($fTable, $i . "\t" . "*" . $nStartExonCoding . "\t" . "*" . $nEndExonCoding . "\t" . $nStartExonGenomic . "\t" . $nEndExonGenomic . "\t" . strlen(str_replace("|", "", $aExon[$i])) . "\t" . strlen($aIntron[$i]) . "\n");
                            }
                            $nStartExonCoding = $nEndExonCoding + 1;
                            
                        } else {
                            // no translation stop in this or previous exons
                            if ($i == count($aExon)) {
                                // last exon, no intron length to write
                                fwrite($fTable, $i . "\t" . $nStartExonCoding . "\t" . $nEndExonCoding . "\t" . $nStartExonGenomic . "\t" . $nEndExonGenomic . "\t" . strlen(str_replace("|", "", $aExon[$i])) . "\n");
                            } else {
                                // not the last exon, also write the intron length
                                fwrite($fTable, $i . "\t" . $nStartExonCoding . "\t" . $nEndExonCoding . "\t" . $nStartExonGenomic . "\t" . $nEndExonGenomic . "\t" . strlen(str_replace("|", "", $aExon[$i])) . "\t" . strlen($aIntron[$i]) . "\n");
                            }
                            $nStartExonCoding = $nStartExonCoding + strlen($aExon[$i]);
                        }
                        $nStartExonGenomic = $nStartExonGenomic + strlen(str_replace("|", "", $aExon[$i])) + strlen($aIntron[$i]);
                    }
                $sOut .= ($sOut? "\n" : '') . 'Successfully wrote exon lengths table, see: <A href="'. ROOT_PATH . 'refseq/' . $_POST['symbol'] . '_table.txt" target="_blank">' . $_POST['symbol'] . '_table.txt</A>)';
                fclose($fTable);
                } else {
                    // This really shouldn't happen, as we have checked this already...
                    lovd_errorAdd('Couldn\'t open file to write to for table ' . $fTable);
                }
        
                // Create a table with start and end positions of exons in genomic and coding DNA, including lengths and intron lengths
                // and write to a html file
                $sTableHTMLFile = $sPath . $_POST['symbol'] . '_table.html';
                if (file_exists($sTableHTMLFile)) {
                    switch ($_POST['exists']) {
                        case 'skip' :
                            // Skip this file, we already have a file
                            $sOut .= ($sOut? "\n" : '') . 'Skipped creation of html table, file existed';
                            $sTableHTMLFile = '';
                            break;
                        case 'rename' :
                            // Rename the old file, we create a new refseq
                            $sTableHTMLFile = lovd_fileCopiesExist($sTableHTMLFile);
                            break;
                    }
                }
                $fTable = fopen($sTableHTMLFile, 'w');
                if ($fTable) {
                    if ($bFilesExisted) {
                        //If the intron files were skipped you'll need to adjust $nEnd
                        $sCodingSequence = implode($aExon);// put the whole exon array in a string
                        $nEnd  = 0;
                        for ($i = $nStartTranslation; $i < $nExonNucleotides; $i += 3) {//$nExonNucleotides should be the same as strlen($sCodingSequence) check this
                            if (in_array(strtolower(substr($sCodingSequence, $i, 3)), array('taa', 'tag', 'tga'))) {
                                // stop codon!
                                $nEnd = $i + 3;
                                break;
                            }
                        }
                    }
                    
                    fwrite($fTable, '<HTML><BODY>' . "\n\n" . '<TABLE border="1">' . "\n");
                    // write the column headers
                    fwrite($fTable, '  <TR>' . "\n" . '    <TH>exon</TH>' . "\n" . '    <TH>c.startExon</TH>' . "\n" . '    <TH>c.endExon</TH>' . "\n" . '    <TH>g.startExon</TH>' . "\n" . '    <TH>g.endExon</TH>' . "\n" . '    <TH>lengthExon</TH>' . "\n" . '    <TH>lengthIntron</TH></TR>');
                    $nStartExonCoding = 1 - $nStartTranslation;     //start of nucleotide numbering exon coding DNA
                    $nEndExonCoding = 0;                            //end of nucleotide numbering exon coding DNA
                    $nStartExonGenomic = 1 + strlen($aIntron[0]);   //start of nucleotide numbering exon genomic DNA
                    $nEndExonGenomic = 0;                           //end of nucleotide numbering exon genomic DNA
                    $lCodingSeq = $nEnd - $nStartTranslation;       //length of the coding sequence (from start to stop codon)
                    $bStopExon = false;                             // flag if translation already stopped

                    for ($i = 1; $i <= count($aExon); $i++) {// start at 1 because first element is upstream sequence
                        $nEndExonCoding = $nStartExonCoding + strlen($aExon[$i]) - 1;
                        $nEndExonGenomic = $nStartExonGenomic + strlen(str_replace("|", "", $aExon[$i])) - 1;
                        if (($nEndExonCoding >= $lCodingSeq) && $nStartExonCoding < $lCodingSeq && $bStopExon == false) {
                            // Translation stops in this exon
                            $bStopExon = true;
                            $nEndExonCoding = $nStartExonCoding + $nStartTranslation + strlen($aExon[$i]) - $nEnd - 1;
                            fwrite($fTable, "\n" . '<TR>' . "\n" . '    <TD>' . $i . '</TD>' . "\n" . '    <TD>' . $nStartExonCoding . '</TD>' . "\n" . '    <TD>' . "*" . $nEndExonCoding . '</TD>' . "\n" . '    <TD>' . $nStartExonGenomic . '</TD>' . "\n" . '    <TD>' . $nEndExonGenomic . '</TD>' . "\n" . '    <TD>' . strlen(str_replace("|", "", $aExon[$i])) . '</TD>' . "\n" . '    <TD>' . ($i == count($aExon)? '&nbsp;' : strlen($aIntron[$i])) . '</TD></TR>');
                            $nStartExonCoding = $nEndExonCoding + 1;

                        } elseif ($bStopExon == true) {
                            // Translation stopped in a previous exon
                            fwrite($fTable, "\n" . '<TR>' . "\n" . '    <TD>' . $i . '</TD>' . "\n" . '    <TD>' . "*" . $nStartExonCoding . '</TD>' . "\n" . '    <TD>' . "*" . $nEndExonCoding . '</TD>' . "\n" . '    <TD>' . $nStartExonGenomic . '</TD>' . "\n" . '    <TD>' . $nEndExonGenomic . '</TD>' . "\n" . '    <TD>' . strlen(str_replace("|", "", $aExon[$i])) . '</TD>' . "\n" . '    <TD>' . ($i == count($aExon)? '&nbsp;' : strlen($aIntron[$i])) . '</TD></TR>');
                            $nStartExonCoding = $nEndExonCoding + 1;
                            
                        } else {
                            // no translation stop in this or previous exons
                            fwrite($fTable, "\n" . '<TR>' . "\n" . '    <TD>' . $i . '</TD>' . "\n" . '    <TD>' . $nStartExonCoding . '</TD>' . "\n" . '    <TD>' . $nEndExonCoding . '</TD>' . "\n" . '    <TD>' . $nStartExonGenomic . '</TD>' . "\n" . '    <TD>' . $nEndExonGenomic . '</TD>' . "\n" . '    <TD>' . strlen(str_replace("|", "", $aExon[$i])) . '</TD>' . "\n" . '    <TD>' . ($i == count($aExon)? '&nbsp;' : strlen($aIntron[$i])) . '</TD></TR>');
                            $nStartExonCoding = $nStartExonCoding + strlen($aExon[$i]);
                        }
                        $nStartExonGenomic = $nStartExonGenomic + strlen(str_replace("|", "", $aExon[$i])) + strlen($aIntron[$i]);
                    }
                    $sOut .= ($sOut? "\n" : '') . 'Successfully wrote exon lengths table, see: <A href="'. ROOT_PATH . 'refseq/' . $_POST['symbol'] . '_table.html" target="_blank">exons table</A>)';
                    fwrite($fTable, '</TABLE>' . "\n\n" . '</BODY></HTML>');
                    fclose($fTable);
                } else {
                    // This really shouldn't happen, as we have checked this already...
                    lovd_errorAdd('Couldn\'t open file to write to for table ' . $fTable);
                }



                // Create a file in GenBank flat file format.
                if ($_POST['genbankfile']) {
                    $sGenBankFile = ROOT_PATH . 'genbank/' . $_POST['symbol'] . '_lovd.gb';
                    
                    if (file_exists($sGenBankFile)) {
                        switch ($_POST['exists']) {
                            case 'skip' :
                                // Skip this file, we already have a file
                                $sOut .= ($sOut? "\n" : '') . 'Skipped creation of file in GenBank format, file existed';
                                $sGenBankFile = '';
                                break;
                            case 'rename' :
                                // Rename the old file, we create a new refseq
                                $sGenBankFile = lovd_fileCopiesExist($sGenBankFile);
                                break;
                        }
                    }
            
                    if ($bFilesExisted) {
                        //If the intron files were skipped you'll need to adjust $nEnd
                        $sCodingSequence = implode($aExon);// put the whole exon array in a string
                        $nEnd  = 0;
                        for ($i = $nStartTranslation; $i < $nExonNucleotides; $i += 3) {//$nExonNucleotides should be the same as strlen($sCodingSequence) check this
                            if (in_array(strtolower(substr($sCodingSequence, $i, 3)), array('taa', 'tag', 'tga'))) {
                                // stop codon!
                                $nEnd = $i + 3;
                                break;
                            }
                        }
                    }

                    $fGenBank = fopen($sGenBankFile, 'w');
                    $sOrigin = '';// Sequence for the ORIGIN part
                    for ($n = 0; $n < count($aIntron); $n++) {//first element $aIntron is the upstream sequence, last the downstream sequence
                        if ($n > 0 && $n < count($aIntron)) {
                            $sOrigin .= str_replace("|", "", $aExon[$n]); // add exon without the '|'
                        }
                        // 2.0-13; 2008-10-31 Added by Gerard to solve bug when no up and/or downstream sequences were provided in the GenBank file
                        if (!isset($aIntron[0])) {
                            // no upstream sequence provided
                            $aIntron[0] = '';
                        }

                        $sOrigin .= $aIntron[$n]; // add intron
                    }
                    // Now format the $sOrigin string according to the GenBank format: 6 colums of 10 nucleotides each, preceeded by a counter
                    $sOriginFormatted = '';
                    for ($k = 0; $k < strlen($sOrigin); $k += 60) {
                        $sOriginFormatted .= sprintf("%9d", $k + 1);
                        $sOriginFormatted .= ' ';
                        for ($l = $k + 0; $l < $k + 60; $l+=10) {
                            $sOriginFormatted .= sprintf("%-10s", substr($sOrigin, $l, 10));
                            $sOriginFormatted .= ' ';
                        }
                        $sOriginFormatted .= "\n";
                    }

                    // Determine the start and end positions for the mRNA and CDS parts
                    $smRNA = '';
                    $sCDS = '';
                    $lCDS = 0;
                    $nEndRNA = 0;
                    //2.0-15; 2009-02-03 by Gerard
                    $lTotalExons = 0; // total length of the exons before the exon in which translation starts

                    $bStartExon = false;
                    $bEndExon = false;
                    $bCDSwritten = false;
                    $lCodingSeq = $nEnd - $nStartTranslation;
    
                    for ($n = 1; $n <= count($aExon); $n++) {
                        // 2.0-13; 2008-10-31 Added by Gerard to solve bug when no up and/or downstream sequences were provided in the GenBank file
                        if (!isset($aIntron[0])) {
                            // no upstream sequence provided
                            $aIntron[0] = '';
                        }

                        $nStartRNA = strlen($aIntron[$n-1]) + $nEndRNA + 1;
                        $nEndRNA = $nStartRNA + strlen(str_replace("|", "", $aExon[$n])) - 1;
                        $lCDS += ($nEndRNA - $nStartRNA + 1);
                        $smRNA .= $nStartRNA . '..' . $nEndRNA;
                        if ($n < count($aExon)) {
                            // no comma after the last exon
                            $smRNA .= ',';
                        }
                        if ($n == 1) {
                            $nStartGene = $nStartRNA;
                        }
                        //2.0-15; 2009-02-03 by Gerard
                        if ($n < count($aExon)) {
                            // calculate the total length of the exons before the translation start exon
                            // needed to calculate the $nStartCDS of the translation start exon
                            $lTotalExons += strlen($aExon[$n - 1]);
                        }
                        if ($n == count($aExon)) {
                            $nEndGene = $nEndRNA;
                            $sGene = $nStartGene . '..' . $nEndGene;
                        }
                        if (in_array($nStartTranslation, range(1, $lCDS)) && !$bStartExon && !$bEndExon) {
                            // start of translation is in this exon
                            if ($n == 1) {
                                $nStartCDS = $nStartRNA + $nStartTranslation - 1;
                            } else {
                                //2.0-15; 2009-02-03 by Gerard: strlen($aExon[$n - 1]) replaced by $lTotalExons
                                $nStartCDS = $nStartRNA + ($nStartTranslation - $lTotalExons - 1);
                            }
                            $nEndCDS = $nEndRNA;
                            $sCDS .= $nStartCDS . '..' . $nEndCDS;
                            $bStartExon = true;
                            //2.0-15; 2009-02-03 by Gerard: To prevent an ending comma
                            if (count($aExon) != 1) {
                                // You do not want a comma if it is a one exon gene
                                $sCDS .= ',';
                            }
                        } elseif (in_array($nStartTranslation, range(1, $lCDS)) && !in_array($nEnd - 1, range(1, $lCDS))) {
                            // past exon with start of translation
                            $nStartCDS = $nStartRNA;
                            $nEndCDS = $nEndRNA;
                            $sCDS .= $nStartCDS . '..' . $nEndCDS;
                            $bEndExon = true;
                            $sCDS .= ',';
                        } elseif (in_array($nStartTranslation, range(1, $lCDS)) && in_array($lCodingSeq, range(1, $lCDS)) && !$bCDSwritten){
                            
                            $nStartCDS = $nStartRNA;
                            $nEndCDS = $nStartCDS + strlen(str_replace("|", "", $aExon[$n])) - 1 - $lCDS + $nEnd - 1;
                            $sCDS .= $nStartCDS . '..' . $nEndCDS;
                            $bCDSwritten = true;
                        }
                    }
                    if ($fGenBank) {
                        fwrite($fGenBank,
                            'LOCUS       NC_00000' . "\t" . strlen($sOrigin) . ' bp' . "\t" . 'DNA' . "\n" . 
                            'DEFINITION  ' . "\n" . 
                            'ACCESSION   NC_00000' . "\n" . 
                            'SOURCE      Homo sapiens (human)' . "\n" . 
                            '  ORGANISM  Homo sapiens' . "\n" . 
                            '            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;' . "\n" . 
                            '            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;' . "\n" . 
                            '            Catarrhini; Hominidae; Homo.' . "\n" . 
                            'FEATURES             Location/Qualifiers' . "\n" . 
                            '     source          1..' . strlen($sOrigin) . "\n" . 
                            '                     /organism="Homo sapiens"' . "\n" . 
                            '     gene            ' . $sGene . "\n" . 
                            '                     /gene="' . $_POST['symbol'] . '"' . "\n" . 
                            '     mRNA            join(' . $smRNA . ')' . "\n" . 
                            '                     /gene="' . $_POST['symbol'] . '"' . "\n" . 
                            '                     /transcript_id="' . $_POST['transcript_id'] . '"' . "\n" . 
                            '     CDS             join(' . $sCDS . ')' . "\n" . 
                            '                     /gene="' . $_POST['symbol'] . '"' . "\n" . 
                            '                     /protein_id="' . $_POST['protein_id'] . '"' . "\n" . 
                            '                     /db_xref="' . $_POST['db_xref'] . '"' . "\n" . 
                            'ORIGIN      ' . "\n" . $sOriginFormatted . "\n" . 
                            "//");
                    
                        $sOut .= ($sOut? "\n" : '') . 'Successfully wrote file in GenBank flat file format '. $_POST['symbol'] . '_lovd.gb';
                        fclose($fGenBank);

                        //2.0-14; 2008-11-14 by Gerard for updating the database. If the file was skipped (because it existed) $sGenBankFile will be empty
                        if (lovd_isCurator($_POST['symbol'])) {
                            @mysql_query('UPDATE ' . TABLE_DBS . ' SET genbank = 1, genbank_uri ="' . $_POST['symbol'] . '_lovd.gb" WHERE symbol = "' . $_POST['symbol'] . '" AND genbank = 0 AND genbank_uri= ""');
                        }

                    } else {
                        // This really shouldn't happen, as we have checked this already...
                        lovd_errorAdd('Couldn\'t open file in GenBank format to write to ' . $fGenbank);
                    }
                }
                print('Output for this step:<BR>' . "\n" . str_replace("\n", '<BR>' . "\n", $sOut) . '<BR><BR>' . "\n");

                
                // To continue to step 3, we need to create a form and send all data.
                print('<FORM action="' . $_SERVER['PHP_SELF'] . '?step=3" method="post">' . "\n" .
                      '  <INPUT type="hidden" name="gene" value="' . $_POST['gene'] . '">' . "\n" .
                      '  <INPUT type="hidden" name="symbol" value="' . $_POST['symbol'] . '">' . "\n" .
                      '  <INPUT type="hidden" name="file" value="' . $_POST['file'] . '">' . "\n" .
                      '  <INPUT type="hidden" name="sequence" value="' . $_POST['sequence'] . '">' . "\n" .
                      '  <INPUT type="hidden" name="exists" value="' . $_POST['exists'] . '">' . "\n" .
                      '  <INPUT type="submit" value="Continue to next step">' . "\n" .
                      '</FORM><BR>' . "\n\n");
                
                require ROOT_PATH . 'inc-bot-clean.php';
                exit;
            }
        }

    } else {
        // Standard settings.
        $_POST['exists'] = 'overwrite';

        // Do we have a gene selected?
        // 2008-10-31; 2.0-13; by Gerard to solve already filled in textboxes
        if (!isset($_POST['symbol']) && !isset($_POST['gene'])) {
            if ($_SESSION['currdb']) {
                $_POST['symbol'] = $_SESSION['currsymb'];
                $_POST['gene'] = $_SETT['currdb']['gene'];
                if (!($_SETT['currdb']['genbank'] && $_SETT['currdb']['genbank_uri'])) {
                    $_POST['genbankfile'] = 1;
                }
            }
        }

        if (!empty($_GET['symbol'])) {
            $_POST['symbol'] = $_GET['symbol'];
        }
        if (!empty($_GET['gene'])) {
            $_POST['gene'] = $_GET['gene'];
        }
        if (empty($_POST['file'])) {
            $_POST['file'] = '';
        }
    }



    // Print the form for step 2: create intronic sequences
    require ROOT_PATH . 'inc-top-clean.php';
    print('<SPAN class="S15"><B>Step 2 - Create intronic sequences</B></SPAN><BR><BR>' . "\n\n");

    lovd_errorPrint();

    print('<FORM action="' . $_SERVER['PHP_SELF'] . '?step=2&amp;sent=true" method="post">' . "\n" .
                         '  <INPUT type="hidden" name="file" value="' . $_POST['file'] . '">' . "\n" .
                         '  <TABLE border="0" cellpadding="1" cellspacing="0" width="700">' . "\n");

    $aForm = array();
    $aForm[] = array('POST', '', '', '50%', '50%');
    $aForm[] = array('', 'print', '(All fields are mandatory unless specified otherwise)');
    $aForm[] = 'hr';
    $aForm[] = array('Gene name', 'text', 'gene', '50');
    $aForm[] = 'hr';
    $aForm[] = array('Gene symbol', 'text', 'symbol', '12');
    $aForm[] = 'hr';
    $aForm[] = array('Input sequence', 'textarea', 'sequence', '60', '8');
    $aForm[] = array('', 'print', '<A href="' . ROOT_PATH . 'docs/lovd_scripts/reference_sequence_parser.php" target="_blank">More information on the format</A>');
    $aForm[] = 'hr';
    $aForm[] = 'skip';
    $aForm[] = 'hr';
    $aForm[] = array('', 'print', '<B>If you want LOVD to create a GenBank file, fill in these fields</B>');
    $aForm[] = array('Create a file in GenBank format', 'checkbox', 'genbankfile', 1);
    $aForm[] = array('Transcript ID', 'text', 'transcript_id', '12');
    $aForm[] = array('', 'print', '<SPAN class="form_note">In GenBank file mRNA annotation, e.g. NM_000070.2</SPAN>');
    $aForm[] = array('Protein ID', 'text', 'protein_id', '12');
    $aForm[] = array('', 'print', '<SPAN class="form_note">In GenBank file CDS annotation, e.g. NP_000061.1</SPAN>');
    $aForm[] = array('db_xref', 'text', 'db_xref', '12');
    $aForm[] = array('', 'print', '<SPAN class="form_note">In GenBank file CDS annotation, e.g. GI:4557405</SPAN>');
    $aForm[] = 'hr';
    $aForm[] = 'skip';
    $aForm[] = 'hr';
    $aForm[] = array('If files are found to exist, I will', 'select', 'exists', 1, array('skip' => 'skip the file', 'rename' => 'rename the old file', 'overwrite' => 'overwrite it'), '', '', '');
    $aForm[] = array('', 'submit', 'Continue');
    lovd_viewForm($aForm);
    print('</TABLE><BR>' . "\n\n" .
          '  </FORM>' . "\n\n");

    require ROOT_PATH . 'inc-bot-clean.php';
    exit;
}






if ($_GET['step'] == 3) {
    // Get sequence from 2 and parse the coding DNA sequence.

    if (isset($_GET['sent'])) {
        // Verification of the sequence.

        // Error check.
        lovd_errorClean();

        // Mandatory fields with their names.
        $aCheck = array(
                         'gene' => 'Gene name',
                         'symbol' => 'Gene symbol',
                         'sequence' => 'Input sequence'
                        );

        foreach ($aCheck as $key => $val) {
            if (empty($_POST[$key])) {
                lovd_errorAdd('Please fill in the \'' . $val . '\' field.');
            }
        }

        // Genesymbol format, based on http://www.gene.ucl.ac.uk/nomenclature/guidelines.html#1.%20Criteria%20for%20symbol%20assignment
        if ($_POST['symbol'] && (!preg_match('/^[A-Z][A-Z0-9]+(_[A-Za-z0-9_-]+)?$/', $_POST['symbol']) || strlen($_POST['symbol']) > 12)) {
            // Error in genesymbol.
            lovd_errorAdd('Incorrect gene symbol. This field can contain up to 12 characters. The offical gene symbol can only contain uppercase letters and numbers, it may be appended with an underscore followed by letters, numbers, hyphens and underscores.');
        }

        if (!lovd_error()) {
            // All fields filled in, go ahead

            // Need to get the correct variables
            lovd_magicUnquoteAll();

            $sSeq = str_replace("\r", '', $_POST['sequence']);
            $sSeq = str_replace("\n", '', $sSeq);
            
            // Needed variables
            $nNuclPreTranslStart = 0;    // Number of nucleotides before the translation starts
            $nNuclPostTranslStart = 0;   // Number of nucleotides after the translation starts
            $started = false;   // Did we find the translation sign yet?
            
            for ($i = 0; $i < strlen($sSeq); $i ++) {
                $s = $sSeq{$i};
                // We will need to loop through the sequence to provided detailed error messages
                if (!$started) {
                    // We are still before the translation
                    if (preg_match('/[ACGT]/i', $s)) {
                        // We stay where we are
                        $nNuclPreTranslStart ++;
                    } elseif ($s == '|') {
                        // Translation starts
                        $started = true;
                    } elseif ($s == ';') {
                        // Next exon, who cares?
                    } else {
                        lovd_errorAdd('Error : Unexpected character \'' . $s . '\' at char ' . ($nNuclPreTranslStart + 1));
                        break;
                    }
                } else {
                    // We are already translating.
                    if (preg_match('/[ACGT]/i', $s)) {
                        // All ok
                        $nNuclPostTranslStart ++;
                    } elseif ($s == ';') {
                        // Next exon, who cares
                    } else {
                        lovd_errorAdd('Error : Unexpected character \'' . $s . '\' at char ' . ($nNuclPreTranslStart + $nNuclPostTranslStart + 1));
                        break;
                    }
                }
            }
            
            if (!$started) {
                lovd_errorAdd('No translation start could be found. This doesn\'t seem to be a valid coding DNA sequence.');
            }
            
            // The sequences has been parsed. Now create the coding DNA file
            $sNow = date('F j, Y');
            $sNowHead = date('Y-m-d H:i:s');
            $sPath = ROOT_PATH . 'refseq/';
            $sOut = '';
            
            $sFile = $sPath . $_POST['symbol'] . '_codingDNA.html';
            if (file_exists($sFile)) {
                switch ($_POST['exists']) {
                    case 'skip' :
                        // Skip this file, we already have a file
                        $sOut .= ($sOut? "\n" : '') . 'Skipped coding DNA, file existed';
                        require ROOT_PATH . 'inc-top-clean.php';
                        print('<SPAN class="S15"><B>Step 3 - Create coding DNA reference sequence</B></SPAN><BR><BR>' . "\n\n");
                        
                        print('Output for this step :<BR>' . "\n" . str_replace("\n", '<BR>' . "\n", $sOut) . '<BR><BR>' . "\n");
                        
                        require ROOT_PATH . 'inc-bot-clean.php';
                        exit;
                    case 'rename' :
                        // Rename the old file, we create a new refseq
                        $sFile = lovd_fileCopiesExist($sFile);
                        break;
                }
            }
            
            // Write to file.
            $fCoding = fopen($sFile, 'w');
            if ($fCoding) {
                fputs($fCoding, '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"' . "\n" .
                                '        "http://www.w3.org/TR/html4/loose.dtd">' . "\n" .
                                '<HTML lang="en">' . "\n" .
                                '<HEAD>' . "\n" .
                                '  <TITLE>' . $_POST['gene'] . ' (' . $_POST['symbol'] . ') - coding DNA reference sequence</TITLE>' . "\n" .
                                '  <META http-equiv="content-type" content="text/html; charset=ISO-8859-1">' . "\n" .
                                '  <META name="generator" content="LOVD v.' . $_SETT['system']['version'] . '-' . $_SETT['system']['build'] . ' Reference Sequence Parser @ ' . $sNowHead . '">' . "\n" .
                                '  <META name="LOVD copyright" content="&copy; 2004-2008 LUMC: http://www.LUMC.nl/">' . "\n\n" .
                                '  <STYLE type="text/css">' . "\n" .
                                '    body {font-family : Verdana, Helvetica, sans-serif; font-size : 13px;}' . "\n" .
                                '    pre  {font-family : monospace;}' . "\n" .
                                '    sup  {font-size : 0.5em;}' . "\n" .
                                '  </STYLE>' . "\n" .
                                '</HEAD>' . "\n\n" .
                                '<BODY>' . "\n\n" .
                                '<HR>' . "\n" .
                                '<H1 align="center">' . $_POST['gene'] . ' (' . $_POST['symbol'] . ') - coding DNA reference sequence</TITLE>' . "\n" .
                                '  <META http-equiv="content-type"'  . ') - coding DNA reference sequence</H1>' . "\n" .
                                '<P align="center"><I>(used for mutation description)<BR><BR>(last modified ' . $sNow . ')</I></P>' . "\n" .
                                '<HR>' . "\n\n");
                if (!empty($_POST['gblink'])) {
                    $_POST['note'] .= ' The sequence was taken from <a href="http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=nucleotide&amp;val=' . $_POST['gblink'] . '">' . $_POST['gblink'] . '</a></p>';
                }
                if (trim($_POST['note'])) {
                    fputs($fCoding, $_POST['note'] . "\n" . '<HR>' . "\n");
                }
                if ($_POST['link']) {
                    fputs($fCoding, '<I>Please note that introns are available by clicking on the exon numbers above the sequence.</I>' . "\n" . '<HR>' . "\n");
                }
                //2.0-13; 2008-10-30 added by Gerard
                if (file_exists($sPath . $_POST['symbol'] . '_upstream.html')) {
                    fputs($fCoding, '<PRE>' . "\n" . ($_POST['link']? ' (<A href="' . $_POST['symbol'] . '_upstream.html">upstream sequence</A>)' . "\n" : ''));
                } else {
                    fputs($fCoding, '<PRE>' . "\n" . '(upstream sequence) ' . "\n");
                }
                // Get rid of any form of whitespace.
                $_POST['sequence'] = preg_replace('/\s+/', '', $sSeq);
                // $sSeq will now contains the entire sequence including the exon splits
                $sSeq = strtolower($sSeq);
                $sTranslStart = '|';      // At this character translation starts
                $nStartTranslation = strpos(' ' . $sSeq, $sTranslStart);    // attention: can be present in the 5utr region
                $s5utr  = substr($sSeq, 0, $nStartTranslation - 1);         // Sequence before the ATG
                $sTranslated = strtoupper(substr($sSeq, $nStartTranslation));     // Sequence from the ATG
                $l5utr  = strlen(str_replace(';', '', $s5utr));             // Number of nucleotides in $s5utr
                $lTranslated = strlen(str_replace(';', '', $sTranslated));  // Number of nucleotides $sTranslated



                // We must know the locations of the exon splits
                $aExonStartPos5utr = array(-1 => 0); // start positions of exons in the 5utr region
                $nExon5utr = substr_count($s5utr, ';'); //number of exons splits in the 5utr region
                for ($i = 0; $i < $nExon5utr; $i ++) {//$aExonStartPos5utr[$i-1] bestaat in de eerste stap niet Als je nu gewoon es bij 1 begint
                    // Loop; continue until you have found all the exons. Puts all the exon split locations in an array
                    $aExonStartPos5utr[$i] = $aExonStartPos5utr[$i-1] + strpos(substr($s5utr, $aExonStartPos5utr[$i-1]), ';') + 1;
                    // Removes the ';' exon split from the sequence.
                    $s5utr = substr_replace($s5utr, '', $aExonStartPos5utr[$i] - 1, 1);
                }

                // Keep the exon splits in an array both for the 5utr and the translated region
                $aExonStartPosTransl = array(-1 => 0); // start positions of exons in the translated region
                $nExonTrans = substr_count($sTranslated, ';');
                for ($i = 0; $i < $nExonTrans; $i ++) {//$aExonStartPosTransl[$i-1] bestaat in de eerste stap nog niet
                    // Loop; continue until you have found all the exons. Puts all the exon split locations in an array
                    $aExonStartPosTransl[$i] = $aExonStartPosTransl[$i-1] + strpos(substr($sTranslated, $aExonStartPosTransl[$i-1]), ';') + 1;
                    // Removes the ';' exon split from the sequence.
                    $sTranslated = substr_replace($sTranslated, '', $aExonStartPosTransl[$i] - 1, 1);
                }

                // Prevent error if split occurs right before the ATG.
                if (in_array(strlen($s5utr) + 1, $aExonStartPos5utr)) {
                    $aExonStartPosTransl[] = 1;
                    sort($aExonStartPosTransl);
                    $nExonTrans ++;
                }


                $nExon = 1;
                $sNumr = '      ';

                if ($s5utr) {
                    // determine the number of 5utr nucleotides in the first line: leftover
                    $nLeftover = $l5utr % $LENGTH_LINE;
                    if ($nLeftover) {
                        $sRegl = substr($s5utr, 0, $nLeftover);
                        $l_voor_left = $LENGTH_LINE - $nLeftover;
                        $nReglExon = 0;
                        foreach ($aExonStartPos5utr as $val) {
                            if ($val<$nLeftover) {
                                $nReglExon ++;
                            } else {
                                break;
                            }
                        }
                        fputs($fCoding, " ");
                        $nLeft = $l_voor_left;//-(3*$nReglExon);//changed by Gerard at 22-08-2008
                        if ($nLeft >= 1) {
                            fputs($fCoding, str_repeat(" ", $nLeft));
                        }
                        $sReglDots = substr($sLinemarkBack, -$nLeftover);
                        if ($nReglExon) {
                            for ($i=0; $i<strlen($sRegl); $i++) {
                                if (in_array($i+1, $aExonStartPos5utr)) {
                                    $nExon ++;
                                    fputs($fCoding, " | ");
                                    if ($_POST['link']) {
                                        fputs($fCoding, "<A href=\"" . $_POST['symbol'] . "_intron_" . str_pad($nExon-1, "2", "0", STR_PAD_LEFT) . ".html\" name=\"" . ($nExon - 1) . "\">");
                                    } else {
                                        fputs($fCoding, "<A name=\"" . ($nExon - 1) . "\">");
                                    }
                                    if (substr($sReglDots,$i,1) == ".") {
                                        fputs($fCoding, "<b>$nExon</b>");
                                    } else {
                                        fputs($fCoding, $nExon);
                                    }
                                    fputs($fCoding, "</A>");
                                } else {
                                    fputs($fCoding, substr($sReglDots, $i, 1));
                                }
                            }
                        } else {
                            fputs($fCoding, $sReglDots);
                        }
                        fputs($fCoding, "\n ");
                        if ($nLeft >= 1) {
                            fputs($fCoding, str_repeat(" ", $nLeft));
                        }
                        if ($nReglExon) {
                            for ($i=0; $i<strlen($sRegl); $i++) {
                                if (in_array($i+1, $aExonStartPos5utr)) {
                                    fputs($fCoding, " | ");
                                }
                                fputs($fCoding, substr($sRegl, $i, 1));
                            }
                        } else {
                            fputs($fCoding, $sRegl);
                        }
                        fputs($fCoding, ($nLeft < 1? substr($sNumr, -$nLeft) : $sNumr) . " -" . ($l5utr-$nLeftover+1) . "\n\n");
                    }

                    // line by line
                    for ($i=-($l5utr-$nLeftover); $i+$LENGTH_LINE<=0; $i+=$LENGTH_LINE) {
                        $k = $i+$l5utr+1;
                        $sRegl = substr($s5utr, $i, $LENGTH_LINE);
                        $nVoorExon = 0;
                        foreach ($aExonStartPos5utr as $val) {
                            if ($val >= $k && $val < $k+$LENGTH_LINE) {
                                $nVoorExon ++;
                            } elseif ($val > $k+$LENGTH_LINE) {
                                continue;
                            }
                        }
                        if ($nVoorExon) {
                            fputs($fCoding, " ");
                            for ($j=0; $j<$LENGTH_LINE; $j++,$k++) {
                                if (in_array($k, $aExonStartPos5utr)) {
                                    $nExon ++;
                                    fputs($fCoding, " | ");
                                    if ($_POST['link']) {
                                        fputs($fCoding, "<A href=\"" . $_POST['symbol'] . "_intron_" . str_pad($nExon-1, "2", "0", STR_PAD_LEFT) . ".html\" name=\"" . ($nExon - 1) . "\">");
                                    } else {
                                        fputs($fCoding, "<A name=\"" . ($nExon - 1) . "\">");
                                    }
                                    $sTmp = "";
                                    if (substr($sLinemarkBack, $j, 1) == ".") {
                                        $sTmp .= "<B>" . substr($nExon, 0, 1) . "</B>";
                                    } else {
                                        $sTmp .= substr($nExon, 0, 1);
                                    }
                                    if ($nExon >= 10) {
                                        $j++;
                                        $k++;
                                        if (substr($sLinemarkBack, $j, 1) == ".") {
                                            $sTmp .= "<B>" . substr($nExon, 1, 1) . "</B>";
                                        } else {
                                            $sTmp .= substr($nExon, 1, 1);
                                        }
                                    }
                                    if ($nExon >= 100) {
                                        $j++;
                                        $k++;
                                        if (substr($sLinemarkBack, $j, 1) == ".") {
                                            $sTmp .= "<B>" . substr($nExon, 2, 1) . "</B>";
                                        } else {
                                            $sTmp .= substr($nExon, 2, 1);
                                        }
                                    }
                                    fputs($fCoding, $sTmp);
                                    fputs($fCoding, "</A>");
                                } else {
                                    fputs($fCoding, substr($sLinemarkBack, $j, 1));
                                }
                            }
                        } else {
                            fputs($fCoding, " " . $sLinemarkBack);
                        }
                        fputs($fCoding, "\n ");
                        if ($nVoorExon) {
                            for ($j=0,$k=$i+$l5utr+1; $j<$LENGTH_LINE; $j++,$k++) {
                                if (in_array($k, $aExonStartPos5utr)) {
                                    fputs($fCoding, " | ");
                                }
                                fputs($fCoding, substr($sRegl, $j, 1));
                            }
                        } else {
                            fputs($fCoding, $sRegl);
                            $k += $LENGTH_LINE;
                        }
                        fputs($fCoding, substr($sNumr, 3*$nVoorExon) . " " . ($i+$LENGTH_LINE-1) . "\n\n");
                    }
                }
                
                
                // ATG
                $l_prnt = 0;
                $l_prot = 0;
                $stop = false;
                $a_trns = array();
                $a_trns[] = array("A","Ala",array("GCA","GCC","GCG","GCT"));
                $a_trns[] = array("C","Cys",array("TGC","TGT"));
                $a_trns[] = array("D","Asp",array("GAC","GAT"));
                $a_trns[] = array("E","Glu",array("GAA","GAG"));
                $a_trns[] = array("F","Phe",array("TTC","TTT"));
                $a_trns[] = array("G","Gly",array("GGA","GGC","GGG","GGT"));
                $a_trns[] = array("H","His",array("CAC","CAT"));
                $a_trns[] = array("I","Ile",array("ATA","ATC","ATT"));
                $a_trns[] = array("K","Lys",array("AAA","AAG"));
                $a_trns[] = array("L","Leu",array("CTA","CTC","CTG","CTT","TTA","TTG"));
                $a_trns[] = array("M","Met",array("ATG"));
                $a_trns[] = array("N","Asn",array("AAC","AAT"));
                $a_trns[] = array("P","Pro",array("CCA","CCC","CCG","CCT"));
                $a_trns[] = array("Q","Gln",array("CAA","CAG"));
                $a_trns[] = array("R","Arg",array("AGA","AGG","CGA","CGC","CGG","CGT"));
                $a_trns[] = array("S","Ser",array("AGC","AGT","TCA","TCC","TCG","TCT"));
                $a_trns[] = array("T","Thr",array("ACA","ACC","ACG","ACT"));
                $a_trns[] = array("V","Val",array("GTA","GTC","GTG","GTT"));
                $a_trns[] = array("W","Trp",array("TGG"));
                $a_trns[] = array("X","***",array("TAA","TAG","TGA"));
                $a_trns[] = array("Y","Tyr",array("TAC","TAT"));

                for ($i=0; $i<=$lTranslated; $i+=$LENGTH_LINE) {
                    $sPrnt = substr($sTranslated, $i, $LENGTH_LINE);
                    $sPrntFinl = "";
                    $l_line_nucl = 0;
                    $l_line_prot = 0;
                    $n_trns_exon = 0;
                    $k = $i+1;
                    foreach ($aExonStartPosTransl as $val) {
                        if ($val >= $k && $val < $k+$LENGTH_LINE) {
                            $n_trns_exon ++;
                        } elseif ($val > $k+$LENGTH_LINE) {
                            continue;
                        }
                    }

                    // frameshift
                    $a_lowr = array();
                    $a_undr = array();
                    $a_bold = array();

                    // translation
                    //$s_prot = ""; // changed by Gerard at 22-08-2008; was never used
                    $sProtShrt = "";

                    for ($j=0; $j < $LENGTH_LINE && $stop == false; $j+=3) {
                        for ($k=0; $k < count($a_trns); $k++) {
                            if (in_array(substr($sPrnt, $j, 3), $a_trns[$k][2])) {
                                $sTemp = $a_trns[$k][0];
                                $l_prot ++;
                                if ($a_trns[$k][0] == "X") {
                                    $l_prot --;
                                    $stop = "stopped";
                                    $j += 3;
                                    // $n_break tells me where to break the line after the stop codon.
                                    $n_break = $j;
                                    for (; $j < $LENGTH_LINE; $j++) {
                                        $a_lowr[] = $j;
                                    }
                                    break;
                                }
                            } else {
                                $sTemp = "";
                            }
                            if ($sTemp) { break; }
                        }
                        $sTemp = ($sTemp? $sTemp : "?");
                        $sTemp = ($j%30 == 27? "<B>" . $sTemp . "</B>" : $sTemp);
                        $sProtShrt .= $sTemp . "  ";
                        $l_line_prot += 3;
                    }

                    $sPrnt2 = substr($sTranslated, $i-2, $LENGTH_LINE+3);
                    for ($j=0; $j < $LENGTH_LINE+6; $j += 3) {
                        if (in_array(substr($sPrnt2, $j, 3), $a_trns[19][2])) {
                            $a_bold[] = $j-2;
                            $a_bold[] = $j-1;
                            $a_bold[] = $j;
                        }
                    }
                    for ($j=1; $j < $LENGTH_LINE+6; $j += 3) {
                        if (in_array(substr($sPrnt2, $j, 3), $a_trns[19][2])) {
                            $a_undr[] = $j-2;
                            $a_undr[] = $j-1;
                            $a_undr[] = $j;
                        }
                    }

                    // Prepare DNA sequence.
                    for ($j = 0, $k = $i + 1; $j < $LENGTH_LINE; $j ++, $k ++) {
                        $c_prnt = substr($sPrnt, $j, 1);
                        if (in_array($j, $a_lowr) || $stop == "done") {
                            $c_prnt = strtolower($c_prnt);

                            // If this is the first line we switch to lowercase, we'll need to stop this line.
                            if (!empty($n_break)) {
                                $c_prnt = '';
                            }
                        }
                        if (in_array($j, $a_bold) && $c_prnt) {
                            $c_prnt = "<B>" . $c_prnt . "</B>";
                        }
                        if (in_array($j, $a_undr) && $c_prnt) {
                            $c_prnt = "<U>" . $c_prnt . "</U>";
                        }
                        if (in_array($k, $aExonStartPosTransl)) {
                            $c_prnt = " | " . $c_prnt;
                        }
                        $sPrntFinl .= $c_prnt;

                        // Create number at the right of the sequence.
                        if ($l_prnt{0} != '*') {
                            // Maybe this is a weird check. Will there ever be no $c_prnt?
                            $l_prnt = ($c_prnt? $l_prnt+1 : $l_prnt);
                        } elseif ($c_prnt) {
                            // We're at the special after-stop notation, $c_prnt should not be empty.
                            $l_prnt = '*' . (substr($l_prnt, 1) + 1);
                        }
                        $l_line_nucl = ($c_prnt? $l_line_nucl+1 : $l_line_nucl);
                    }
                    $sPrntFinl = str_replace("</B><B>", "", $sPrntFinl);
                    $sPrntFinl = str_replace("</U><U>", "", $sPrntFinl);

                    // Dots; exon split in line?
                    if ($n_trns_exon) {
                        fputs($fCoding, " ");
                          for ($j=0,$k=$i+1; (empty($n_break) && $j < $LENGTH_LINE) || (!empty($n_break) && $j < $n_break); $j++,$k++) {
                            if (in_array($k, $aExonStartPosTransl)) {
                                $nExon ++;
                                fputs($fCoding, " | ");
                                if ($_POST['link']) {
                                    fputs($fCoding, "<A href=\"" . $_POST['symbol'] . "_intron_" . str_pad($nExon-1, "2", "0", STR_PAD_LEFT) . ".html\" name=\"" . ($nExon - 1) . "\">");
                                } else {
                                    fputs($fCoding, "<A name=\"" . ($nExon - 1) . "\">");
                                }
                                $sTmp = "";
                                if (substr($sLinemark, $j, 1) == ".") {
                                    $sTmp .= "<B>" . substr($nExon, 0, 1) . "</B>";
                                } else {
                                    $sTmp .= substr($nExon, 0, 1);
                                }
                                if ($nExon >= 10) {
                                    $j++;
                                    $k++;
                                    if (substr($sLinemark, $j, 1) == ".") {
                                        $sTmp .= "<B>" . substr($nExon, 1, 1) . "</B>";
                                    } else {
                                        $sTmp .= substr($nExon, 1, 1);
                                    }
                                }
                                if ($nExon >= 100) {
                                    $j++;
                                    $k++;
                                    if (substr($sLinemarkBack, $j, 1) == ".") {
                                        $sTmp .= "<B>" . substr($nExon, 2, 1) . "</B>";
                                    } else {
                                        $sTmp .= substr($nExon, 2, 1);
                                    }
                                }
                                fputs($fCoding, $sTmp);
                                fputs($fCoding, "</A>");
                            } else {
                                fputs($fCoding, substr($sLinemark,$j,1));
                            }
                        }
                    } else {
                        // Writes dots when line contains no exon split.
                        fputs($fCoding, " " . substr($sLinemark, 0, $l_line_nucl));
                    }
                    fputs($fCoding, "\n ");
                    // Writes DNA line
                    if ($sPrntFinl) {
                        fputs($fCoding, $sPrntFinl . str_repeat(" ", $LENGTH_LINE-$l_line_nucl) . substr($sNumr, 3*$n_trns_exon) . "  " . $l_prnt);
                    }

                    // Protein line.
                    if ($stop != "done") {
                        fputs($fCoding, "\n ");
                        if ($n_trns_exon) {
                            for ($j=0,$k=$i+1; $j < strlen($sProtShrt); $j++,$k++) {
                                if (in_array($k, $aExonStartPosTransl)) {
                                    fputs($fCoding, " | ");
                                }
                                if (substr($sProtShrt, $j, 1) == "<") {
                                    fputs($fCoding, substr($sProtShrt, $j, 8));
                                    $j += 7;
                                } else {
                                    fputs($fCoding, substr($sProtShrt, $j, 1));
                                }
                            }
                        } else {
                            fputs($fCoding, $sProtShrt);
                        }
                        fputs($fCoding, str_repeat(" ", $LENGTH_LINE-$l_line_prot) . substr($sNumr, 3*$n_trns_exon) . "  " . $l_prot);
                    }
                    fputs($fCoding, "\n\n");

                    $stop = ($stop == "stopped" || $stop == "done"? "done" : false);

                    // If we just had a break; some variables need to get changed.
                    if (isset($n_break)) {
                        $n_break = $LENGTH_LINE - $n_break;
                        $i -= $n_break;
                        if ($l_prnt{1} == '*') {
                            $l_prnt = (substr($l_prnt, 1) - $n_break);
                        } else {
                            $l_prnt = '*0';
                        }
                        unset($n_break);
                    }
                }

                //2.0-13; 2008-10-30 by Gerard: added if {} else {}
                if (file_exists($sPath . $_POST['symbol'] . '_downstream.html')) {
                    fputs($fCoding, ($_POST['link']? ' (<A href="' . $_POST['symbol'] . '_downstream.html">downstream sequence</A>)' . "\n" : '') .
                          "</PRE>\n\n" .
                          ($_POST['legend']?
                          "<SPAN style=\"font-size : 15px;\"><U><B>Legend:</B></U></SPAN><BR>\n" .
                          "Nucleotide numbering (following the rules of the <A href=\"http://www.HGVS.org/mutnomen/\" target=\"_blank\">HGVS</A> for a 'Coding DNA Reference Sequence') is indicated at the right of the sequence, counting the A of the ATG translation initiating Methionine as 1. Every 10<SUP>th</SUP> nucleotide is indicated by a &quot;.&quot; above the sequence. The " . ucfirst($_POST['gene']) . " protein sequence is shown below the coding DNA sequence, with numbering indicated at the right starting with 1 for the translation initiating Methionine. Every 10<SUP>th</SUP> amino acid is shown in bold. The position of introns is indicated by a vertical line, splitting the two exons. The start of the first exon (transcription initiation site) is indicated by a '\', the end of the last exon (poly-A addition site) by a '/'. The exon number is indicated above the first nucleotide(s) of the exon. To aid the description of frame shift mutations, all <B>stop codons in the +1 frame are shown in bold</B> while all <U>stop codons in the +2 frame are underlined</U>.<BR>\n\n" : ""));
                } else {
                    fputs($fCoding, ' (downstream sequence)' . "\n" .
                          "</PRE>\n\n" .
                          ($_POST['legend']?
                          "<SPAN style=\"font-size : 15px;\"><U><B>Legend:</B></U></SPAN><BR>\n" .
                          "Nucleotide numbering (following the rules of the <A href=\"http://www.HGVS.org/mutnomen/\" target=\"_blank\">HGVS</A> for a 'Coding DNA Reference Sequence') is indicated at the right of the sequence, counting the A of the ATG translation initiating Methionine as 1. Every 10<SUP>th</SUP> nucleotide is indicated by a &quot;.&quot; above the sequence. The " . ucfirst($_POST['gene']) . " protein sequence is shown below the coding DNA sequence, with numbering indicated at the right starting with 1 for the translation initiating Methionine. Every 10<SUP>th</SUP> amino acid is shown in bold. The position of introns is indicated by a vertical line, splitting the two exons. The start of the first exon (transcription initiation site) is indicated by a '\', the end of the last exon (poly-A addition site) by a '/'. The exon number is indicated above the first nucleotide(s) of the exon. To aid the description of frame shift mutations, all <B>stop codons in the +1 frame are shown in bold</B> while all <U>stop codons in the +2 frame are underlined</U>.<BR>\n\n" : ""));
                }

                //2.0-13; 2008-10-30 link to coding sequence added by Gerard
                $sOut .= ($sOut? "\n" : '') . 'Successfully wrote coding DNA reference sequence' . ($_POST['link']? ' (<A href="' . ROOT_PATH . 'refseq/' . $_POST['symbol'] . '_codingDNA.html" target="_blank">' . $_POST['symbol'] . '_coding DNA sequence </A>)' . "\n" : '');
                fputs($fCoding, '<HR>' . "\n" .
                               '<P align="center" style="font-size : 11px;">' . "\n" .
                               '  Powered by <A href="http://www.DMD.nl/LOVD/' . $_SETT['system']['version'] . '/" target="_blank">LOVDv.' . $_SETT['system']['version'] . '</A>' . ($_SETT['system']['build']? ' Build ' . $_SETT['system']['build'] : '') . '<BR>' . "\n" .
                               '  &copy;2004-2008 <A href="http://www.lumc.nl/" target="_blank">Leiden University Medical Center</A>' . "\n" .
                               '</P>' . "\n" .
                               '<HR>' . "\n\n" .
                               '</BODY>' . "\n" .
                               '</HTML>');
                fclose($fCoding);
            } else {
                // This really shouldn't happen, as we have checked this already...
                lovd_errorAdd('Couldn\'t open file to write to for coding DNA reference sequence');
            }

            if (!lovd_error()) {
                require ROOT_PATH . 'inc-top-clean.php';
                print('<SPAN class="S15"><B>Step 3 - Create coding DNA reference sequence</B></SPAN><BR><BR>' . "\n\n");

                print('Output for this step :<BR>' . "\n" . str_replace("\n", '<BR>' . "\n", $sOut) . '<BR><BR>' . "\n");

                require ROOT_PATH . 'inc-bot-clean.php';
                exit;
            }
        }
        lovd_magicUnquoteAll();

    } else {
        // Standard settings.
        if (empty($_POST['exists'])) {
            $_POST['exists'] = 'overwrite';
        }

        // Do we have a gene selected?
        //2.0-13; 2008-10-31 by Gerard to solve already filled in textboxes
        if (!isset($_POST['symbol']) && !isset($_POST['gene'])) {
            if ($_SESSION['currdb']) {
                $_POST['symbol'] = $_SESSION['currsymb'];
                $_POST['gene'] = $_SETT['currdb']['gene'];
            }
        }

        if (!empty($_GET['symbol'])) {
            $_POST['symbol'] = $_GET['symbol'];
        }
        if (!empty($_GET['gene'])) {
            $_POST['gene'] = $_GET['gene'];
        }
        $_POST['note'] = '<p>This file was created to facilitate the description of sequence variants' . (!empty($_POST['symbol'])? ' in the ' . $_POST['symbol'] . ' gene' : '') . ' based on a coding DNA reference sequence following <a href="http://www.HGVS.org/mutnomen/">the HGVS recommendations</a></p>';
        $_POST['link'] = 1;
        $_POST['legend'] = 1;
    }

    
    require ROOT_PATH . 'inc-top-clean.php';
    print('<SPAN class="S15"><B>Step 3 - Create coding DNA reference sequence</B></SPAN><BR><BR>' . "\n\n");

    lovd_errorPrint();

    print('<FORM action="' . $_SERVER['PHP_SELF'] . '?step=3&amp;sent=true" method="post">' . "\n" .
          '  <TABLE border="0" cellpadding="1" cellspacing="0" width="700">' . "\n");

    $aForm   = array();
    $aForm[] = array('POST', '', '', '50%', '50%');
    $aForm[] = array('', 'print', '(All fields are mandatory unless specified otherwise)');
    $aForm[] = 'hr';
    $aForm[] = array('Gene name', 'text', 'gene', '50');
    $aForm[] = 'hr';
    $aForm[] = array('Gene symbol', 'text', 'symbol', '12');
    $aForm[] = 'hr';
    $aForm[] = array('Notes above sequence<BR><I>(optional, HTML enabled)</I>', 'textarea', 'note', '60', '2');
    $aForm[] = array('Include link to GenBank record in notes above sequence <I>(optional)</I>', 'text', 'gblink', '15');
    $aForm[] = array('', 'print', 'If you fill in an original GenBank ID, a link to the record at NCBI will be included');
    $aForm[] = array('Provide links to intronic sequences', 'checkbox', 'link', 1);
    $aForm[] = array('Provide legend', 'checkbox', 'legend', 1);
    $aForm[] = 'hr';
    $aForm[] = array('Input sequence', 'textarea', 'sequence', '60', '8');
    $aForm[] = array('', 'print', '<A href="' . ROOT_PATH . 'docs/lovd_scripts/reference_sequence_parser.php" target="_blank">More information on the format</A>');
    $aForm[] = array('If the file is found to exist, I will', 'select', 'exists', 1, array('skip' => 'skip the file', 'rename' => 'rename the old file', 'overwrite' => 'overwrite it'), '', '', '');
    $aForm[] = array('', 'submit', 'Continue');
    lovd_viewForm($aForm);
    print('</TABLE><BR>' . "\n\n" .
          '  </FORM>' . "\n\n");

    require ROOT_PATH . 'inc-bot-clean.php';
    exit;





//////////////////////////////////////////////////////////////////////////////////////////////
} else {
    // Print the form for choosing between the 3 steps:
    // 1) import a GenBank file
    // 2) create intronic sequences
    // 3) create the coding sequence
    require ROOT_PATH . 'inc-top-clean.php';
    print('<SPAN class="S18"><B>LOVD Reference Sequence Parser</B></SPAN><BR><BR>' . "\n\n" .
          'Welcome to the LOVD Reference Sequence parser. With this tool, you can create your own genomic and/or coding DNA reference sequence. These will be written to the \'refseq\' directory. For more information or troubleshooting, please refer to the <A href="' . ROOT_PATH . 'docs/lovd_scripts/reference_sequence_parser.php" target="_blank">LOVD manual</A>.<BR><BR>' . "\n\n");

    $a_menu = array(
                    array('Genbank file', 'Import annotated Genbank sequence to extract genomic sequence of your gene of interest for step 2.'),
                    array('Create intronic sequences', 'Parse genomic sequence from step 1 or a sequence you made yourself and create intronic sequences for the genomic reference sequence. Extract coding DNA sequence for step 3.<BR><A href="' . ROOT_PATH . 'docs/lovd_scripts/reference_sequence_parser.php" target="_blank">More information on the format</A>.'),
                    array('Create coding DNA reference sequence', 'Parse coding DNA sequence from step 2 or a sequence you made yourself and create coding DNA sequence for the coding DNA reference sequence.<BR><A href="' . ROOT_PATH . 'docs/lovd_scripts/reference_sequence_parser.php" target="_blank">More information on the format</A>.'),
                   );
    foreach ($a_menu as $n_step => $a_print) {
        $n_step ++;
        print('<TABLE border="0" cellpadding="2" cellspacing="0" width="725" style="border : 1px solid #c0c0c0;">' . "\n" .
              '  <TR>' . "\n" .
              '    <TD valign="top" align="center" width="40"><IMG src="../gfx/lovd_' . $n_step . '.png" alt="Step ' . $n_step . '" width="32" height="32" hspace="4" vspace="4"></TD>' . "\n" .
              '    <TD valign="middle"><SPAN class="S15"><A href="' . $_SERVER['PHP_SELF'] . '?step=' . $n_step . '"><B>Step ' . $n_step . ' - ' . $a_print[0] . '</B></A></SPAN><BR>' . "\n" .
              '      ' . $a_print[1] . '</TD></TR></TABLE><BR>' . "\n\n");
    }
            
    require ROOT_PATH . 'inc-bot-clean.php';
    exit;
}
?>
