seqan/seqan3

push_back for aa27

leannmlindsey opened this issue · 5 comments

Platform

  • SeqAn version: seqan3
  • Operating system: Linux nid00039 5.3.18-24.46_6.0.29-cray_ari_c # 1 SMP Mon Mar 14 09:11:41 UTC 2022 (6c38a31) x86_64 x86_64 x86_64 GNU/Linux
  • Compiler: c++ (GCC) 11.2.0 20210728 (Cray Inc.), gcc (GCC) 11.2.0 20210728 (Cray Inc.)

Question

I have been trying to modify one of your alignment tutorials to work with amino acids instead of dna but I have had trouble when I am trying to call the function push_back on aa27. This is the error that I get:

/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp: In function 'int main()':
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:43:26: error: no matching function for call to 'std::vector<seqan3::sequence_record<seqan3::type_list<std::vector<seqan3::aa27, std::allocator<seqan3::aa27> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<seqan3::phred42, std::allocator<seqan3::phred42> > >, seqan3::fields<seqan3::field::seq, seqan3::field::id, seqan3::field::qual> > >::push_back(std::vector<seqan3::aa27>&)'
   43 |         records.push_back(rec.sequence());
      |         ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~
**DNA version of code that works**
#include <string> // for std::string
#include <tuple>  // for std::tie
#include <vector> // for st
#include <concepts>
#include <ranges>
#include <omp.h>
#include <seqan3/alignment/detail/pairwise_alignment_concept.hpp>
#include <seqan3/alphabet/cigar/cigar.hpp>
#include <seqan3/alignment/aligned_sequence/all.hpp> // for alignment stream operator <<
#include <seqan3/alignment/configuration/all.hpp>    // for all configs in the seqan3::align_cfg namespace
#include <seqan3/alignment/pairwise/all.hpp>
#include <seqan3/alphabet/nucleotide/dna5.hpp>
#include <seqan3/argument_parser/all.hpp>      // for argument_parser
#include <seqan3/core/debug_stream.hpp>        // for debug_stream
#include <seqan3/io/sequence_file/all.hpp>   // for sequence_file_input
#include <chrono>
#include <seqan3/io/sam_file/all.hpp>
#include <seqan3/io/sam_file/detail/cigar.hpp>
#define NOW std::chrono::high_resolution_clock::now()
using namespace std;
using std::cout;
using std::endl;
using std::string;

struct result_sw{
  int ref_number;
  int ref_begin;
  int ref_end;
  int que_begin;
  int que_end;
  int score;
  std::string cigar;
};

int main(int argc, char * argv[])
{
    // Receive the filename as program argument.
    std::string que_filename{};
    std::string ref_filename{};
    std::string out_file = "/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/build/out.txt";
    //std::filesystem::path sam_path{"out.sam"};
    //auto filename = std::filesystem::current_path() / "out2.sam";
    //seqan3::sam_file_output fout{filename};
    

    bool CIGAR = false;
    seqan3::argument_parser parser("My-first-program", argc, argv);
    parser.add_positional_option(ref_filename, "The reference filename of the file to read.");
    parser.add_positional_option(que_filename, "The reference filename of the file to read.");
    int threads=68;
 
    try
    {
        parser.parse();
    }
    catch (seqan3::argument_parser_error const & ext)
    {
        seqan3::debug_stream << "[PARSER ERROR] " << ext.what() << '\n';
        return 0;
    }
 
    seqan3::debug_stream << "Reading file " <<"\n";
 
    // Create the vector to store sequences of type seqan3::dna5_vector.
    std::vector<seqan3::dna5_vector> ref_sequences;
    std::vector<seqan3::dna5_vector> que_sequences;
 
    // Iterate through the file and store the sequences.
    
    //seqan3::sam_file_output sam_out{sam_path,
    //                                seqan3::fields<seqan3::field::seq,
    //                                               seqan3::field::id,
    //                                               seqan3::field::ref_id,
    //                                               seqan3::field::ref_offset,
    //                                               seqan3::field::alignment,
     //                                              seqan3::field::qual,
      //                                             seqan3::field::mapq>{}};
    seqan3::sequence_file_input file_in{ref_filename};
    for (auto & record : file_in)
    {
        ref_sequences.push_back(record.sequence());
    }

    seqan3::sequence_file_input fin{que_filename};
    for (auto & record : fin)
    {
        que_sequences.push_back(record.sequence());
    }

    auto start = NOW;
    //vector<int> global_strtA;
    //vector<int> global_endA;
    //vector<int> global_strtB;
    //vector<int> global_endB;
    vector<result_sw> results[threads];

    auto min_cfg = seqan3::align_cfg::method_local{}
                 | seqan3::align_cfg::scoring_scheme{
                     seqan3::nucleotide_scoring_scheme{seqan3::match_score{3}, seqan3::mismatch_score{-3}}}
                     | seqan3::align_cfg::gap_cost_affine{seqan3::align_cfg::open_score{-6}, seqan3::align_cfg::extension_score{-1}}
                     | seqan3::align_cfg::output_score{}
                     | seqan3::align_cfg::output_end_position{}
                     | seqan3::align_cfg::output_begin_position{}
                     | seqan3::align_cfg::output_sequence1_id{}
                     | seqan3::align_cfg::output_sequence2_id{}
                     | seqan3::align_cfg::output_alignment{};
                   

    omp_set_num_threads(threads);
    
    #pragma omp parallel for
    for(int i=0; i<que_sequences.size(); i++){
        int thread_id = omp_get_thread_num();
        result_sw temp_res;
        
        //std::cout << "Sequence Number" << i << '\n';
        //std::cout << "Query Seq: ";
	    //seqan3::debug_stream << que_sequences[i] <<'\n';
        //std::cout << "Ref Seq: ";
    	//seqan3::debug_stream << ref_sequences[i] <<'\n';
        
	    for (auto && res : align_pairwise(std::tie(que_sequences[i], ref_sequences[i]),min_cfg))
			{
            //seqan3::debug_stream << res.sequence1_id() << '\t';
            //seqan3::debug_stream << res.sequence2_id() << '\t';
			//seqan3::debug_stream << res.score() << '\t';
            //seqan3::debug_stream << res.sequence1_end_position() << '\t';
            //seqan3::debug_stream << res.sequence1_begin_position() << '\t';
            //seqan3::debug_stream << res.sequence2_end_position() << '\t';
            //seqan3::debug_stream << res.sequence2_begin_position() << '\n';
			//seqan3::debug_stream << res.alignment() << '\n';
            temp_res.ref_number = i;
            temp_res.ref_begin = res.sequence1_begin_position();
            temp_res.ref_end = res.sequence1_end_position();
            temp_res.que_begin = res.sequence2_begin_position();
            temp_res.que_end = res.sequence2_end_position();
            temp_res.score = res.score();
            //temp_res.cigar = res.output_alignment();
            //temp_res.cigar = res.cigar();
            //fout.push_back(res);
	    //if(CIGAR)
		  //  temp_res.cigar = seqan3::detail::get_cigar_string(std::make_pair(que_sequences[i], ref_sequences[i]));
			}
            results[thread_id].push_back(temp_res);
        }
    #pragma omp barrier
    auto end=NOW;
    std::chrono::duration<double>diff=end-start;
    cout << "Total Alignments:"<<que_sequences.size()<<"Total Time:"<< diff.count() <<endl;

    ofstream results_file(out_file);
    for(int j = 0; j < threads; j++){
        for(int k = 0; k < results[j].size(); k++){
            auto loc_vec = results[j];
            results_file << loc_vec[k].ref_number << '\t' << loc_vec[k].score<<"\t"<<loc_vec[k].ref_begin<<"\t"<<loc_vec[k].ref_end<<"\t"<<loc_vec[k].que_begin<<"\t"<<loc_vec[k].que_end;
            if(CIGAR)
                results_file<<'\t'<<loc_vec[k].cigar<<endl;
            else
                results_file<<endl;
        }
    }


    return 0;
}
**AA version of code that gives error**
#include <filesystem>
#include <seqan3/io/sequence_file/all.hpp>
#include <seqan3/alignment/pairwise/all.hpp>        // for seqan3::align_cfg and seqan3::align_pairwise
#include <seqan3/alignment/scoring/all.hpp>         // for seqan3::aminoacid_scoring_scheme and
                                                    //     seqan3::aminoacid_similarity_matrix
#include <seqan3/alphabet/aminoacid/aa27.hpp>       // for seqan3::operator""_aa27
#include <seqan3/core/debug_stream.hpp>
 
int main()
{
    using namespace seqan3::literals;
 
    auto seq1 = "QFSEEILSDIYCWMLQCGQERAV"_aa27;
    auto seq2 = "AFLPGWQEENKLSKIWMKDCGCLW"_aa27;
 
    // Configure the alignment kernel.
    //auto config = seqan3::align_cfg::method_global{} |
      //            seqan3::align_cfg::scoring_scheme{
        //              seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::blosum62}} |
          //        seqan3::align_cfg::gap_cost_affine{seqan3::align_cfg::open_score{-9},
                                                     
 
    //for (auto const & res : seqan3::align_pairwise(std::tie(seq1, seq2), config))
      //  seqan3::debug_stream << "Score: " << res.score() << '\n';
      //


    std::filesystem::path current_path = std::filesystem::current_path();
 
    using traits_type = seqan3::sequence_file_input_default_traits_aa;
    seqan3::sequence_file_input<traits_type> fin{current_path / "my.fasta"};

    using record_type = decltype(fin)::record_type;
    std::vector<record_type> records{};
    
    for (auto & rec : fin)
    {
        seqan3::debug_stream << "ID:  " << rec.id() << '\n';
        seqan3::debug_stream << "SEQ: " << rec.sequence() << '\n';
        //seqan3::debug_stream << "QUAL: " << rec.base_qualities() << '\n';
 
    //records.push_back(std::move(rec));
    records.push_back(rec.sequence());
    }

    seqan3::debug_stream << records<< '\n';

    auto config = seqan3::align_cfg::method_global{} |
                  seqan3::align_cfg::scoring_scheme{
                      seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::blosum62}} |
                  seqan3::align_cfg::gap_cost_affine{seqan3::align_cfg::open_score{-6},
                seqan3::align_cfg::extension_score{-1}};
    for (auto const & res_aa : seqan3::align_pairwise(std::tie(seq1, seq2), config))
      seqan3::debug_stream << "Score: " << res_aa.score() << '\n';    

    seqan3::debug_stream << seq1 << '\n';
    seqan3::debug_stream << records[0] << '\n';
}

Note: I have the alignment portion working for the input sequences seq1 and seq2 but I want to replace that with a for loop that goes through all of the records read in from two files (ref.fasta and read.fasta), but I am stuck on the push_back for amino acids

Hey!

In your AA code, you are using std::vector<record_type> records{};, but since you only want to store the sequence there (records.push_back(rec.sequence())), it should be std::vector<seqan3::aa27> records{};.

Thanks for the quick response. I replaced that line and I now get this compile error:

/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp: In function 'int main()':
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:42:26: error: no matching function for call to 'std::vector<seqan3::aa27>::push_back(std::vector<seqan3::aa27>&)'
   42 |         records.push_back(rec.sequence());
      |         ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~

Is it possible I am missing an #include

This is what I have:
#include <filesystem>
#include <seqan3/io/sequence_file/all.hpp>
#include <seqan3/alignment/pairwise/all.hpp>        // for seqan3::align_cfg and seqan3::align_pairwise
#include <seqan3/alignment/scoring/all.hpp>         // for seqan3::aminoacid_scoring_scheme and
                                                    //     seqan3::aminoacid_similarity_matrix
#include <seqan3/alphabet/aminoacid/aa27.hpp>       // for seqan3::operator""_aa27
#include <seqan3/core/debug_stream.hpp>

Here is the full error if it helps:

[ 12%] Building CXX object CMakeFiles/aa_example.dir/aa_example.cpp.o
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp: In function 'int main()':
/global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:42:26: error: no matching function for call to 'std::vector<seqan3::aa27>::push_back(std::vector<seqan3::aa27>&)'
   42 |         records.push_back(rec.sequence());
      |         ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~
In file included from /opt/gcc/11.2.0/snos/include/g++/vector:67,
                 from /opt/gcc/11.2.0/snos/include/g++/functional:62,
                 from /opt/gcc/11.2.0/snos/include/g++/pstl/glue_algorithm_defs.h:13,
                 from /opt/gcc/11.2.0/snos/include/g++/algorithm:74,
                 from /global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/seqan3/include/seqan3/io/sequence_file/format_embl.hpp:15,
                 from /global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/seqan3/include/seqan3/io/sequence_file/all.hpp:27,
                 from /global/homes/l/lindsey/FINAL_repos/SEQAN3/tutorial/source/aa_example.cpp:2:
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1187:7: note: candidate: 'void std::vector<_Tp, _Alloc>::push_back(const value_type&) [with _Tp = seqan3::aa27; _Alloc = std::allocator<seqan3::aa27>; std::vector<_Tp, _Alloc>::value_type = seqan3::aa27]'
 1187 |       push_back(const value_type& __x)
      |       ^~~~~~~~~
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1187:35: note:   no known conversion for argument 1 from 'std::vector<seqan3::aa27>' to 'const value_type&' {aka 'const seqan3::aa27&'}
 1187 |       push_back(const value_type& __x)
      |                 ~~~~~~~~~~~~~~~~~~^~~
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1203:7: note: candidate: 'void std::vector<_Tp, _Alloc>::push_back(std::vector<_Tp, _Alloc>::value_type&&) [with _Tp = seqan3::aa27; _Alloc = std::allocator<seqan3::aa27>; std::vector<_Tp, _Alloc>::value_type = seqan3::aa27]'
 1203 |       push_back(value_type&& __x)
      |       ^~~~~~~~~
/opt/gcc/11.2.0/snos/include/g++/bits/stl_vector.h:1203:30: note:   no known conversion for argument 1 from 'std::vector<seqan3::aa27>' to 'std::vector<seqan3::aa27>::value_type&&' {aka 'seqan3::aa27&&'}
 1203 |       push_back(value_type&& __x)
      |                 ~~~~~~~~~~~~~^~~
make[2]: *** [CMakeFiles/aa_example.dir/build.make:80: CMakeFiles/aa_example.dir/aa_example.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:99: CMakeFiles/aa_example.dir/all] Error 2
make: *** [Makefile:101: all] Error 2

I did find a workaround that works. I left

std::vector<record_type> records{};

and replaced

records.push_back(rec.sequence());

with

records.push_back(std::move(rec));

Then in the align section I used:

 for (auto const & res_aa : seqan3::align_pairwise(std::tie(records[0].sequence(), records[1].sequence()), config))

But if you do have a solution for the push_back just for aa sequences, let me know.

Hi @leannmlindsey,

Your workaround works well :)

another way would be (similar to @eseilers approach):

typename decltype(file_in)::sequence_type sequences{}; // takes the type from the file

and then the push_back should work

sequences.push_back(rec.sequence());

Since you have a working version, I think the issue is resolved. Feel free to reopen it if you have another question or there is something missing.