defget_args():
parser=argparse.ArgumentParser(description="This describes what your program does.")
parser.add_argument("--input_xl", help="Semi-Optional: input excel file in .xlsx format containing information about the samples.", default="info.xlsx", required=False)
parser.add_argument("--input_dir", help="Semi-Optional: input directory for all input files files.", default="Data", required=False)
args=parser.parse_args()
returnargs
classSample_Metadata: #Listed from biosys.py"""A slice of an OTU table, and associated metadata for a diatom sample."""def__init__(self, sample_name, sample_id):
self.name=sample_nameself.id=sample_id
defassign_results(self, otus, batch_num):
self.otu_tab=otustry:
count=self.otu_tab[str(self.folder)].sum()
self.count=countexceptKeyError:
self.count=0print("Seq count for "+str(self.folder) +" has been set to 0.")
ifself.count>=3000:
self.pass_fail="Successful"ifbatch_num:
self.batch_num=str(batch_num).split(".")[0]
try:
date=batch_num_dict[self.batch_num]
exceptKeyError:
date="Run metadata has not been set"print(date+" for sample: "+str(self.folder) +" "+str(self.batch_num))
self.analysis_date=dateelse:
self.batch_num=no_valueself.analysis_date=no_value
Copy paste classes:
classFormatError(Exception):
'''Formating of an input file is incompatible with this program.'''pass
classSingle_Fastq_Entry:
'''A class used to store information about paired end fastq data.'''def__init__(self,header,seq,qual,filename):
self.tag=filename[18:26] #This will need changing depending on the format of the filenameifheader.lstrip()[0] =="@":
self.header=header.lstrip()[0:45]
self.header_full=header.lstrip()
self.seq=seq.lstrip()
self.qual=qual.lstrip()
classPaired_Fastq_Entry:
'''A class holding infor about a sample.'''def__init__(self,f_fastq,r_fastq):
self.f=f_fastqself.r=r_fastq
classSample_Meta():
'''A class used to store data about the QC process of the pipeline.'''def__init__(self, sample_id, file_path_f, file_path_r, file_format):
self.sample_id=sample_idself.path_f=file_path_fself.path_r=file_path_rself.format=file_formatpath,file_name=os.path.split(self.path_f)
self.directory=pathself.stage="start"defassign_progress(self, stage_passed, file_path_f, file_path_r):
self.path_f=file_path_fself.path_r=file_path_rself.stage=stage_passed
classbcolours:
'''Use as print(bcolours.type + "test to print" + bcolours.endc)'''HEADER='\033[95m'OKBLUE='\033[94m'OKGREEN='\033[92m'WARNING='\033[93m'FAIL='\033[91m'ENDC='\033[0m'BOLD='\033[1m'UNDERLINE='\033[4m'
defremove_info(df, col, info): #Removes info from col in df, removes and row with all zerosdf=df[~df.col.str.contains(info)]
df=df.set_index(list(df)[0])
df=df.loc[(df!=0).any(axis=1)]
returndf