push_back for aa27
leannmlindsey opened this issue · 5 comments
Platform
- SeqAn version: seqan3
- Operating system: Linux nid00039 5.3.18-24.46_6.0.29-cray_ari_c # 1 SMP Mon Mar 14 09:11:41 UTC 2022 (6c38a31) x86_64 x86_64 x86_64 GNU/Linux
- Compiler: c++ (GCC) 11.2.0 20210728 (Cray Inc.), gcc (GCC) 11.2.0 20210728 (Cray Inc.)
Question
I have been trying to modify one of your alignment tutorials to work with amino acids instead of dna but I have had trouble when I am trying to call the function push_back on aa27. This is the error that I get:
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp: In function 'int main()':
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:43:26: error: no matching function for call to 'std::vector<seqan3::sequence_record<seqan3::type_list<std::vector<seqan3::aa27, std::allocator<seqan3::aa27> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<seqan3::phred42, std::allocator<seqan3::phred42> > >, seqan3::fields<seqan3::field::seq, seqan3::field::id, seqan3::field::qual> > >::push_back(std::vector<seqan3::aa27>&)'
43 | records.push_back(rec.sequence());
| ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~
**DNA version of code that works**
#include <string> // for std::string
#include <tuple> // for std::tie
#include <vector> // for st
#include <concepts>
#include <ranges>
#include <omp.h>
#include <seqan3/alignment/detail/pairwise_alignment_concept.hpp>
#include <seqan3/alphabet/cigar/cigar.hpp>
#include <seqan3/alignment/aligned_sequence/all.hpp> // for alignment stream operator <<
#include <seqan3/alignment/configuration/all.hpp> // for all configs in the seqan3::align_cfg namespace
#include <seqan3/alignment/pairwise/all.hpp>
#include <seqan3/alphabet/nucleotide/dna5.hpp>
#include <seqan3/argument_parser/all.hpp> // for argument_parser
#include <seqan3/core/debug_stream.hpp> // for debug_stream
#include <seqan3/io/sequence_file/all.hpp> // for sequence_file_input
#include <chrono>
#include <seqan3/io/sam_file/all.hpp>
#include <seqan3/io/sam_file/detail/cigar.hpp>
#define NOW std::chrono::high_resolution_clock::now()
using namespace std;
using std::cout;
using std::endl;
using std::string;
struct result_sw{
int ref_number;
int ref_begin;
int ref_end;
int que_begin;
int que_end;
int score;
std::string cigar;
};
int main(int argc, char * argv[])
{
// Receive the filename as program argument.
std::string que_filename{};
std::string ref_filename{};
std::string out_file = "/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/build/out.txt";
//std::filesystem::path sam_path{"out.sam"};
//auto filename = std::filesystem::current_path() / "out2.sam";
//seqan3::sam_file_output fout{filename};
bool CIGAR = false;
seqan3::argument_parser parser("My-first-program", argc, argv);
parser.add_positional_option(ref_filename, "The reference filename of the file to read.");
parser.add_positional_option(que_filename, "The reference filename of the file to read.");
int threads=68;
try
{
parser.parse();
}
catch (seqan3::argument_parser_error const & ext)
{
seqan3::debug_stream << "[PARSER ERROR] " << ext.what() << '\n';
return 0;
}
seqan3::debug_stream << "Reading file " <<"\n";
// Create the vector to store sequences of type seqan3::dna5_vector.
std::vector<seqan3::dna5_vector> ref_sequences;
std::vector<seqan3::dna5_vector> que_sequences;
// Iterate through the file and store the sequences.
//seqan3::sam_file_output sam_out{sam_path,
// seqan3::fields<seqan3::field::seq,
// seqan3::field::id,
// seqan3::field::ref_id,
// seqan3::field::ref_offset,
// seqan3::field::alignment,
// seqan3::field::qual,
// seqan3::field::mapq>{}};
seqan3::sequence_file_input file_in{ref_filename};
for (auto & record : file_in)
{
ref_sequences.push_back(record.sequence());
}
seqan3::sequence_file_input fin{que_filename};
for (auto & record : fin)
{
que_sequences.push_back(record.sequence());
}
auto start = NOW;
//vector<int> global_strtA;
//vector<int> global_endA;
//vector<int> global_strtB;
//vector<int> global_endB;
vector<result_sw> results[threads];
auto min_cfg = seqan3::align_cfg::method_local{}
| seqan3::align_cfg::scoring_scheme{
seqan3::nucleotide_scoring_scheme{seqan3::match_score{3}, seqan3::mismatch_score{-3}}}
| seqan3::align_cfg::gap_cost_affine{seqan3::align_cfg::open_score{-6}, seqan3::align_cfg::extension_score{-1}}
| seqan3::align_cfg::output_score{}
| seqan3::align_cfg::output_end_position{}
| seqan3::align_cfg::output_begin_position{}
| seqan3::align_cfg::output_sequence1_id{}
| seqan3::align_cfg::output_sequence2_id{}
| seqan3::align_cfg::output_alignment{};
omp_set_num_threads(threads);
#pragma omp parallel for
for(int i=0; i<que_sequences.size(); i++){
int thread_id = omp_get_thread_num();
result_sw temp_res;
//std::cout << "Sequence Number" << i << '\n';
//std::cout << "Query Seq: ";
//seqan3::debug_stream << que_sequences[i] <<'\n';
//std::cout << "Ref Seq: ";
//seqan3::debug_stream << ref_sequences[i] <<'\n';
for (auto && res : align_pairwise(std::tie(que_sequences[i], ref_sequences[i]),min_cfg))
{
//seqan3::debug_stream << res.sequence1_id() << '\t';
//seqan3::debug_stream << res.sequence2_id() << '\t';
//seqan3::debug_stream << res.score() << '\t';
//seqan3::debug_stream << res.sequence1_end_position() << '\t';
//seqan3::debug_stream << res.sequence1_begin_position() << '\t';
//seqan3::debug_stream << res.sequence2_end_position() << '\t';
//seqan3::debug_stream << res.sequence2_begin_position() << '\n';
//seqan3::debug_stream << res.alignment() << '\n';
temp_res.ref_number = i;
temp_res.ref_begin = res.sequence1_begin_position();
temp_res.ref_end = res.sequence1_end_position();
temp_res.que_begin = res.sequence2_begin_position();
temp_res.que_end = res.sequence2_end_position();
temp_res.score = res.score();
//temp_res.cigar = res.output_alignment();
//temp_res.cigar = res.cigar();
//fout.push_back(res);
//if(CIGAR)
// temp_res.cigar = seqan3::detail::get_cigar_string(std::make_pair(que_sequences[i], ref_sequences[i]));
}
results[thread_id].push_back(temp_res);
}
#pragma omp barrier
auto end=NOW;
std::chrono::duration<double>diff=end-start;
cout << "Total Alignments:"<<que_sequences.size()<<"Total Time:"<< diff.count() <<endl;
ofstream results_file(out_file);
for(int j = 0; j < threads; j++){
for(int k = 0; k < results[j].size(); k++){
auto loc_vec = results[j];
results_file << loc_vec[k].ref_number << '\t' << loc_vec[k].score<<"\t"<<loc_vec[k].ref_begin<<"\t"<<loc_vec[k].ref_end<<"\t"<<loc_vec[k].que_begin<<"\t"<<loc_vec[k].que_end;
if(CIGAR)
results_file<<'\t'<<loc_vec[k].cigar<<endl;
else
results_file<<endl;
}
}
return 0;
}
**AA version of code that gives error**
#include <filesystem>
#include <seqan3/io/sequence_file/all.hpp>
#include <seqan3/alignment/pairwise/all.hpp> // for seqan3::align_cfg and seqan3::align_pairwise
#include <seqan3/alignment/scoring/all.hpp> // for seqan3::aminoacid_scoring_scheme and
// seqan3::aminoacid_similarity_matrix
#include <seqan3/alphabet/aminoacid/aa27.hpp> // for seqan3::operator""_aa27
#include <seqan3/core/debug_stream.hpp>
int main()
{
using namespace seqan3::literals;
auto seq1 = "QFSEEILSDIYCWMLQCGQERAV"_aa27;
auto seq2 = "AFLPGWQEENKLSKIWMKDCGCLW"_aa27;
// Configure the alignment kernel.
//auto config = seqan3::align_cfg::method_global{} |
// seqan3::align_cfg::scoring_scheme{
// seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::blosum62}} |
// seqan3::align_cfg::gap_cost_affine{seqan3::align_cfg::open_score{-9},
//for (auto const & res : seqan3::align_pairwise(std::tie(seq1, seq2), config))
// seqan3::debug_stream << "Score: " << res.score() << '\n';
//
std::filesystem::path current_path = std::filesystem::current_path();
using traits_type = seqan3::sequence_file_input_default_traits_aa;
seqan3::sequence_file_input<traits_type> fin{current_path / "my.fasta"};
using record_type = decltype(fin)::record_type;
std::vector<record_type> records{};
for (auto & rec : fin)
{
seqan3::debug_stream << "ID: " << rec.id() << '\n';
seqan3::debug_stream << "SEQ: " << rec.sequence() << '\n';
//seqan3::debug_stream << "QUAL: " << rec.base_qualities() << '\n';
//records.push_back(std::move(rec));
records.push_back(rec.sequence());
}
seqan3::debug_stream << records<< '\n';
auto config = seqan3::align_cfg::method_global{} |
seqan3::align_cfg::scoring_scheme{
seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::blosum62}} |
seqan3::align_cfg::gap_cost_affine{seqan3::align_cfg::open_score{-6},
seqan3::align_cfg::extension_score{-1}};
for (auto const & res_aa : seqan3::align_pairwise(std::tie(seq1, seq2), config))
seqan3::debug_stream << "Score: " << res_aa.score() << '\n';
seqan3::debug_stream << seq1 << '\n';
seqan3::debug_stream << records[0] << '\n';
}
Note: I have the alignment portion working for the input sequences seq1 and seq2 but I want to replace that with a for loop that goes through all of the records read in from two files (ref.fasta and read.fasta), but I am stuck on the push_back for amino acids
Hey!
In your AA code, you are using std::vector<record_type> records{};
, but since you only want to store the sequence there (records.push_back(rec.sequence())
), it should be std::vector<seqan3::aa27> records{};
.
Thanks for the quick response. I replaced that line and I now get this compile error:
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp: In function 'int main()':
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:42:26: error: no matching function for call to 'std::vector<seqan3::aa27>::push_back(std::vector<seqan3::aa27>&)'
42 | records.push_back(rec.sequence());
| ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~
Is it possible I am missing an #include
This is what I have:
#include <filesystem>
#include <seqan3/io/sequence_file/all.hpp>
#include <seqan3/alignment/pairwise/all.hpp> // for seqan3::align_cfg and seqan3::align_pairwise
#include <seqan3/alignment/scoring/all.hpp> // for seqan3::aminoacid_scoring_scheme and
// seqan3::aminoacid_similarity_matrix
#include <seqan3/alphabet/aminoacid/aa27.hpp> // for seqan3::operator""_aa27
#include <seqan3/core/debug_stream.hpp>
Here is the full error if it helps:
[ 12%] Building CXX object CMakeFiles/aa_example.dir/aa_example.cpp.o
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp: In function 'int main()':
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:42:26: error: no matching function for call to 'std::vector<seqan3::aa27>::push_back(std::vector<seqan3::aa27>&)'
42 | records.push_back(rec.sequence());
| ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~
In file included from /opt/gcc/11.2.0/snos/include/g++/vector:67,
from /opt/gcc/11.2.0/snos/include/g++/functional:62,
from /opt/gcc/11.2.0/snos/include/g++/pstl/glue_algorithm_defs.h:13,
from /opt/gcc/11.2.0/snos/include/g++/algorithm:74,
from /global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/seqan3/include/seqan3/io/sequence_file/format_embl.hpp:15,
from /global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/seqan3/include/seqan3/io/sequence_file/all.hpp:27,
from /global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:2:
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1187:7: note: candidate: 'void std::vector<_Tp, _Alloc>::push_back(const value_type&) [with _Tp = seqan3::aa27; _Alloc = std::allocator<seqan3::aa27>; std::vector<_Tp, _Alloc>::value_type = seqan3::aa27]'
1187 | push_back(const value_type& __x)
| ^~~~~~~~~
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1187:35: note: no known conversion for argument 1 from 'std::vector<seqan3::aa27>' to 'const value_type&' {aka 'const seqan3::aa27&'}
1187 | push_back(const value_type& __x)
| ~~~~~~~~~~~~~~~~~~^~~
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1203:7: note: candidate: 'void std::vector<_Tp, _Alloc>::push_back(std::vector<_Tp, _Alloc>::value_type&&) [with _Tp = seqan3::aa27; _Alloc = std::allocator<seqan3::aa27>; std::vector<_Tp, _Alloc>::value_type = seqan3::aa27]'
1203 | push_back(value_type&& __x)
| ^~~~~~~~~
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1203:30: note: no known conversion for argument 1 from 'std::vector<seqan3::aa27>' to 'std::vector<seqan3::aa27>::value_type&&' {aka 'seqan3::aa27&&'}
1203 | push_back(value_type&& __x)
| ~~~~~~~~~~~~~^~~
make[2]: *** [CMakeFiles/aa_example.dir/build.make:80: CMakeFiles/aa_example.dir/aa_example.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:99: CMakeFiles/aa_example.dir/all] Error 2
make: *** [Makefile:101: all] Error 2
I did find a workaround that works. I left
std::vector<record_type> records{};
and replaced
records.push_back(rec.sequence());
with
records.push_back(std::move(rec));
Then in the align section I used:
for (auto const & res_aa : seqan3::align_pairwise(std::tie(records[0].sequence(), records[1].sequence()), config))
But if you do have a solution for the push_back just for aa sequences, let me know.
Hi @leannmlindsey,
Your workaround works well :)
another way would be (similar to @eseilers approach):
typename decltype(file_in)::sequence_type sequences{}; // takes the type from the file
and then the push_back should work
sequences.push_back(rec.sequence());
Since you have a working version, I think the issue is resolved. Feel free to reopen it if you have another question or there is something missing.