For handling Homologous_Intron.pl output
From Homologous_Intron.pl output fetch intron containing genes
i = 0
with open ("/home/roylab/swadha/phylogeny/nematodes_V2/step8_groupings/codons.txt", "r") as fname:
for line in fname :
arry = line.split("\t")
i = i + 1
l = len(arry)
c1 = 1
c3 = 0
filename = "intron_pos_sequence_row_" + str(i) + ".txt"
with open (filename, "w") as g:
for x in range (l):
columnNo = x
if any(c.islower() for c in (arry[x])):
with open ("/home/roylab/swadha/phylogeny/nematodes_V2/step6_groupings/codons.txt", "r") as fname2:
for line2 in fname2 :
arry2 = line2.split("\t")
g.writelines(arry2[x])
g.writelines("\n")
fname2.close()
g.close()
fname.close()
From Homologous_Intron.pl output fetch intronless containing genes
import sys
from collections import defaultdict
datafile = sys.argv[1]
columnwise = defaultdict(list)
has_intron = set()
with open(datafile) as data:
headers = data.readline().strip().split('\t')
for row in data:
columns = row.strip().split('\t')
for index, c in enumerate(columns):
if index in has_intron:
continue
name = headers[index]
if not (c.isupper() or c == '---'): # is intron
has_intron.add(index)
if name in columnwise:
del columnwise[name]
continue
# otherwise, no intron so add to column list
columnwise[name].append(c)
for name, column_list in sorted(columnwise.items()):
column_list.insert(0, name)
print('\t'.join(column_list))
This is how HI output looks like
CMONO.g10050.t1__Caenorhabditis_monodelphis_JU1667_v1..exons-introns__*33.0* transcript:AgB29_g019_t07__ascaris_suum.PRJNA62057..exons-introns__*26.0*88.0* Transcript:CBG19306__caenorhabditis_briggsae.PRJNA10731..exons-introns__*33.0* transcript:nAv.1.0.1.t07972-RA__acanthocheilonema_viteae.PRJEB4306..exons-introns__*103.1*121.0* CSP28.g5940.t1__Caenorhabditis_sp28_QG2080_v1..exons-introns__*33.0* transcript:DILT_0000453501-mRNA-1__diphyllobothrium_latum.PRJEB1206..exons-introns__*26.0* transcript:Cang_2012_03_13_00257.g8003.t1__caenorhabditis_angaria.PRJNA51225..exons-introns__*33.0* CSP32.g1942.t1__Caenorhabditis_sp32_JU2788_v1..exons-introns__*33.0* transcript:BPAG_0000871301-mRNA-1__brugia_pahangi.PRJEB497..exons-introns__*61.0* transcript:ACAC_0000435501-mRNA-1__angiostrongylus_cantonensis.PRJEB493..exons-introns__*15.2*37.2*69.0*99.1* CPLIC.g9529.t1__Caenorhabditis_plicata_SB355_v1..exons-introns__*11.1* CSP39.g28243.t1__Caenorhabditis_sp39_NIC564_v1..exons-introns__*33.0* transcript:nAv.1.0.1.t03241-RA__acanthocheilonema_viteae.PRJEB4306..exons-introns__*29.1* CBOVI.g167.t1__Caenorhabditis_bovis_LS_v1.exons-introns__*33.0* CDS:RNASEQ_mid-L4_25dC_36hrs_post-L1_g45_x5_X_m__caenorhabditis_elegans.PRJNA13758..exons-introns__*22.0*70.1* CSP40.g23829.t1__Caenorhabditis_sp40_JU2818_v1..exons-introns__*33.0* Transcript:CBN08543__caenorhabditis_brenneri.PRJNA20035..exons-introns__*33.0* transcript:ACOC_0001178601-mRNA-1__angiostrongylus_costaricensis.PRJEB494..exons-introns__*67.2* CDOUG.g27003.t1__Caenorhabditis_doughertyi_JU1771_v1..exons-introns__*33.0* transcript:ACOC_0001174701-mRNA-1__angiostrongylus_costaricensis.PRJEB494..exons-introns__*9.1* transcript:Csp5_scaffold_00030.g1686.t1__caenorhabditis_sinica..PRJNA194557.exons-introns__*33.0* CBOVI.g4900.t1__Caenorhabditis_bovis_LS_v1.exons-introns__*21.0* Transcript:Bm9180__brugia_malayi.PRJNA10729..exons-introns__*61.0* transcript:Cnig_chr_V.g20238__caenorhabditis_nigoni.PRJNA384657..exons-introns__*33.0* CPLIC.g10195.t1__Caenorhabditis_plicata_SB355_v1..exons-introns__*40.1* CMACR.g7142.t1__caenorhabditis_macrosperma_JU2083_v1..exons-introns__*16.1*88.1* transcript:ALUE_0001219701-mRNA-1__ascaris_lumbricoides.PRJEB4950..exons-introns__*61.0* CBOVI.g4934.t1__Caenorhabditis_bovis_LS_v1.exons-introns__*15.1* CBOVI.g4937.t1__Caenorhabditis_bovis_LS_v1.exons-introns__*21.0* Transcript:CRE31556__caenorhabditis_remanei.PRJNA53967..exons-introns__*33.0* CMACR.g3305.t1__caenorhabditis_macrosperma_JU2083_v1..exons-introns__*33.0* CCAST.g691.t1__Caenorhabditis_castelli_JU1956_v1..exons-introns__*33.0* CDS:GBG_briggsae_B0035.9__caenorhabditis_briggsae.PRJNA10731..exons-introns__*24.1* CMONO.g9592.t1__Caenorhabditis_monodelphis_JU1667_v1..exons-introns__*70.1*85.1* CNOUR.g15651.t1__Caenorhabditis_nouraguensis_JU2079_v1..exons-introns__*33.0* CSP31.g25482.t1__Caenorhabditis_sp31_JU2585_v1..exons-introns__*33.0* CSP39.g14341.t1__Caenorhabditis_sp39_NIC564_v1..exons-introns__*16.0* CPLIC.g7604.t1__Caenorhabditis_plicata_SB355_v1..exons-introns__*45.1* transcript:AgB29_g019_t03__ascaris_suum.PRJNA62057..exons-introns__*18.0*84.0*115.0*132.1* CDS:AG1201_C50F4.7.T2__caenorhabditis_elegans.PRJNA13758..exons-introns__*33.0* CMONO.g4362.t1__Caenorhabditis_monodelphis_JU1667_v1..exons-introns__*69.0* CVIRI.g216.t1__Caenorhabditis_virilis_JU1968_v1..exons-introns__*2.2* CDS:mGene_pred_2076__caenorhabditis_elegans.PRJNA13758..exons-introns__*15.1*66.0*99.0*140.0* transcript:csin109360__clonorchis_sinensis.PRJDA72781..exons-introns__*45.1* CSP38.g5030.t1__Caenorhabditis_sp38_JU2809_v1..exons-introns__*33.0* CDS:GeneMark.chr_V.2545__caenorhabditis_elegans.PRJNA13758..exons-introns__*33.0* transcript:Csp11.Scaffold629.g15723.t1__caenorhabditis_tropicalis.PRJNA53597..exons-introns__*33.0* CNOUR.g18298.t1__Caenorhabditis_nouraguensis_JU2079_v1..exons-introns__*33.0* CBOVI.g5126.t1__Caenorhabditis_bovis_LS_v1.exons-introns__*93.1*117.0* transcript:AgR060_g024_t08__ascaris_suum.PRJNA62057..exons-introns__*35.2* CSP26.g15710.t1__Caenorhabditis_sp26_JU2190_v1..exons-introns__*33.0* CSP21.g11473.t1__Caenorhabditis_sp21_NIC534_v1..exons-introns__*88.0* transcript:Acey_s0221.g2581.t5__ancylostoma_ceylanicum.PRJNA231479..exons-introns__*51.1*98.0* CDS:T10C6.gc13__caenorhabditis_elegans.PRJNA13758..exons-introns__*98.2*120.0* transcript:ACAC_0000646301-mRNA-1__angiostrongylus_cantonensis.PRJEB493..exons-introns__*53.2* transcript:AgB06_g136_t01__ascaris_suum.PRJNA62057..exons-introns__*61.0* CWALL.g12508.t1__caenorhabditis_wallacei_JU1898_v1..exons-introns__*33.0* CSP39.g22656.t1__Caenorhabditis_sp39_NIC564_v1..exons-introns__*98.2* Transcript:CBG23443__caenorhabditis_briggsae.PRJNA10731..exons-introns__*24.1* CVIRI.g8691.t1__Caenorhabditis_virilis_JU1968_v1..exons-introns__*33.0* Transcript:CJA07245__caenorhabditis_japonica.PRJNA12591..exons-introns__*7.0* Transcript:Bm4112a__brugia_malayi.PRJNA10729..exons-introns__*100.1* transcript:Sp34_50249000.t1__caenorhabditis_sp34.PRJDB5687..exons-introns__*33.0* CBOVI.g5126.t2__Caenorhabditis_bovis_LS_v1.exons-introns__*51.0* CAFRA.g8060.t1__Caenorhabditis_afra_JU1286_v1..exons-introns__*33.0* Transcript:CJA15171__caenorhabditis_japonica.PRJNA12591..exons-introns__*33.0* CSP21.g14926.t1__Caenorhabditis_sp21_NIC534_v1..exons-introns__*33.0* CSP29.g18988.t1__Caenorhabditis_sp29_QG2083_v1..exons-introns__*33.0* CSP29.g12800.t1__Caenorhabditis_sp29_QG2083_v1..exons-introns__*33.0* CKAMA.g12927.t1__Caenorhabditis_kamaaina_QG2077_v1..exons-introns__*33.0* CBOVI.g4977.t1__Caenorhabditis_bovis_LS_v1.exons-introns__*21.0* transcript:nAv.1.0.1.t03914-RA__acanthocheilonema_viteae.PRJEB4306..exons-introns__*61.0* transcript:ANCCAN_25895__ancylostoma_caninum.PRJNA72585..exons-introns__*96.2* transcript:AgB29_g019_t04__ascaris_suum.PRJNA62057..exons-introns__*26.0*88.0*119.0*136.1*
--- --- --- --- --- --- --- --- --- ATG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TTC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GCC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ATG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ATA --- --- --- --- --- --- --- --- --- ATG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ATC --- --- --- --- --- --- --- --- --- GTT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ATG --- --- --- --- --- --- --- --- --- TTT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TCC --- --- --- --- --- --- --- --- --- CAC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ACG --- --- --- --- --- --- --- --- --- AGC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ATA --- --- --- --- --- --- --- --- --- AGA --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GGT --- --- --- --- --- --- --- --- --- GCT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ACG --- --- --- --- --- --- --- --- --- AGC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GCG --- --- --- --- --- --- --- --- --- GgttggttgttttcagtgctctatcacaatgcaatttttatttcatttcgcccacaaaccctcctagtttagGG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- CTC --- --- --- --- --- --- --- --- --- TTT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- AAgtttgttttgtttcattttcttctttcaagtctttatatctctttgcattgaaatgtaaaaaaggcattccttcctggaaggtccacgaaacatcttcaaacgcttctaaacggcgtccaagttcccgaattcatttaaggtgttgtgttttcatgaggcaattgtcgtttgtcgaatagggttgcgcaaaaagtcattatgtttttagA --- --- --- --- --- --- --- --- --- CCC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- AAT --- --- --- --- --- --- --- --- --- AAT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ACA --- --- --- --- --- --- --- --- --- AAA --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GGG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TTA --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GAG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GAG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ATT --- --- --- --- --- --- --- ATG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TCG --- --- --- --- --- --- --- TTG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GTT --- --- --- --- --- --- --- GCT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ACC --- --- --- --- --- --- --- ACC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- AAA --- --- --- --- --- --- --- AAG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TCG --- --- --- --- --- --- --- GGC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TGT --- --- --- --- --- --- --- ACT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GTG --- --- --- --- --- --- --- CAA --- AAA --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- ATC --- --- --- --- --- --- --- AAG --- CAG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GTG --- --- --- --- --- --- --- GCT --- CTC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- CCG --- --- --- --- --- --- --- GTA --- ACA --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TGG --- --- --- --- --- --- --- CCA --- TTC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- AAT --- --- --- --- --- --- --- GTC --- TCC --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TTC --- --- --- --- --- --- --- ACC --- TTT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GCT --- --- --- --- --- --- --- GGA --- GGG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- AGgtgtttgccgcagctataggaaacaaccaaactattctcctaaggatacagagcactgtgagtttcgattatcgtacattccttttagttttaaaatcaacatgactgatttaactttcttcctgatgtttataatctcttgcgttctgttcggaaaactcttgattctttgttatcttgggtaccaggattccgccgtcggctttgtgtgagacgaggagggcgtctttgggtgggttctgttgtgagtaatttcgctttcgtatttgcctctcgctgtgtttatgagacatgatagaagttcatccaaacgtttcgttactgctttatccgggaatggcccagatgccttccatttctatcatctgggaaatcactcagtgttgcactcgcagcacttaaaaccaccacttcatgcagagtcaaagtgtttgggtttggaaccttattgtagtcatacttgtgaacttagtgctgtcatattgtacactcattacatttccgactatccaatcacctgtagtgtcgttgataatcgagtgactcgagcggctgccagtaactcttccatttgttcctgtcgcgtgcaagagtagcccagtgggttctattcgctctagggactcgtcgagcatcatatctttgttttaagcttttcgtggagaactctgatcatcgggtcggtggtcgttctgcagtgcgtttgacagccctctgcctacgatcagaaagcttacagaccatgtagattcagggccatcgaaacgataacagtcacatcggtgccgtattgtatgtgcgttttcgcgctgtaattctttgcgtacaatcattccatgagatgaccatgatccatcacagtgactggagtgttaacggtaattagttaatggtcaattattaaaccttgtatatttgcactatttcggctgaggacgggttcttctgcatttttgaacaactgttcctcaagaatgaacgagacgatacggaagacatcaatctcttgtgcttctcctgaagggatctcaaggtagtagaccggaagatgagaatgaaccgaggacaatatggtcaaatggtcaaacttacttgtatagtgcgaaacccataacatcgaaagcagtaactctacgtggattgacaagctatttgagacgagcttatgcaggggacgaatatatgcggacaagagaaggatcagtcgtaagaaaagtgctgtatagatggcctcatcagtctatgcttttcgattgttagactgccacgacttaagatttgttgaagtaccatttccgcgctagggaaatgttctgttacatctcaggttatcgcaagaatcgaattagaagaggccttggttcaggcaatgcattgcataaatcactgtcagttgtaataatttacaagtttagaattacttgtagctctaaagtagcgcctgatagcttttacattaggaaaattagcaagtggttcggcattctagtgtcatctcactagctaaagtgtagaatttgtcgttgtgacactagagaagatccacgcattcatattccaatgccggactcatcaaccatatgcgaccataatcaactgttctaaccttcctcattggaagtcatataaaaaagcggctccgaagaaggcgatactgccgaaacgtcagctactgttgtgttatattgtgtgcttttctatggaagtaaatgtatgctgtataacaacctcgcgattgctaaacaagaaaactattttgtatgttgtttcggctggtgcttgaggatttcatgttctttgcaactggggtagaatgctggtttagtcaattctcgcctctgtcatggcagtgatattttttaacgtttaccactgtgtgtgtgatatgactttcatgaagtaaaactttgctttttgcttcgtttatcgcagaagtaggaaagagtttggaggtgtgacttcttttttaaacaacgaaaacatgttactcgacatctttgagcttgaagattgttacactttgcatgctttttcttccaggtacacctagaagggagtgacgactacgtagagatttgaaaaggaattcctttgattcttgcgaggagctcgttcgtgagttgagattgcaacatcagcagttgaagtcggtaggcatctaacgatgctgtttttttccacttgtttgacgttttcttagttttgaagT --- --- --- --- --- --- --- TTT --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- GTC --- --- --- --- --- --- --- GTG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- --- --- --- --- TAT --- --- --- --- --- --- --- AAG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
--- --- --- --- --- ATG --- --- --- TAT --- --- --- --- --- --- --- ATA --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- GAG --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---