Skip to content
Snippets Groups Projects
Commit 5737dd5e authored by james94's avatar james94
Browse files

stats indent error

parent d7f0b685
No related branches found
No related tags found
No related merge requests found
...@@ -25,165 +25,167 @@ else: ...@@ -25,165 +25,167 @@ else:
size_bp = estSize size_bp = estSize
def fasta_iter(fasta_file): def fasta_iter(fasta_file):
"""Takes a FASTA file, and produces a generator of Header and Sequences. """
This is a memory-efficient way of analyzing a FASTA files -- without Takes a FASTA file, and produces a generator of Header and Sequences.
reading the entire file into memory. This is a memory-efficient way of analyzing a FASTA files -- without
reading the entire file into memory.
Parameters Parameters
---------- ----------
fasta_file : str fasta_file : str
The file location of the FASTA file The file location of the FASTA file
Returns Returns
------- -------
header: str header: str
The string contained in the header portion of the sequence record The string contained in the header portion of the sequence record
(everything after the '>') (everything after the '>')
seq: str seq: str
The sequence portion of the sequence record The sequence portion of the sequence record
""" """
fh = open(fasta_file) fh = open(fasta_file)
fa_iter = (x[1] for x in groupby(fh, lambda line: line[0] == ">")) fa_iter = (x[1] for x in groupby(fh, lambda line: line[0] == ">"))
for header in fa_iter: for header in fa_iter:
# drop the ">" # drop the ">"
header = next(header)[1:].strip() header = next(header)[1:].strip()
# join all sequence lines to one. # join all sequence lines to one.
seq = "".join(s.upper().strip() for s in next(fa_iter)) seq = "".join(s.upper().strip() for s in next(fa_iter))
yield header, seq yield header, seq
def read_genome(fasta_file): def read_genome(fasta_file):
"""Takes a FASTA file, and produces 2 lists of sequence lengths. It also """
calculates the GC Content, since this is the only statistic that is not Takes a FASTA file, and produces 2 lists of sequence lengths. It also
calculated based on sequence lengths. calculates the GC Content, since this is the only statistic that is not
calculated based on sequence lengths.
Parameters Parameters
---------- ----------
fasta_file : str fasta_file : str
The file location of the FASTA file The file location of the FASTA file
Returns Returns
------- ------
contig_lens: list contig_lens: list
A list of lengths of all contigs in the genome. A list of lengths of all contigs in the genome.
scaffold_lens: list scaffold_lens: list
A list of lengths of all scaffolds in the genome. A list of lengths of all scaffolds in the genome.
gc_cont: float gc_cont: float
The percentage of total basepairs in the genome that are either G or C. The percentage of total basepairs in the genome that are either G or C.
""" """
gc = 0 gc = 0
total_len = 0 total_len = 0
contig_lens = [] contig_lens = []
scaffold_lens = [] scaffold_lens = []
for _, seq in fasta_iter(fasta_file): for _, seq in fasta_iter(fasta_file):
scaffold_lens.append(len(seq)) scaffold_lens.append(len(seq))
if "NN" in seq: if "NN" in seq:
contig_list = seq.split("NN") contig_list = seq.split("NN")
else: else:
contig_list = [seq] contig_list = [seq]
for contig in contig_list: for contig in contig_list:
if len(contig): if len(contig):
gc += contig.count('G') + contig.count('C') gc += contig.count('G') + contig.count('C')
total_len += len(contig) total_len += len(contig)
contig_lens.append(len(contig)) contig_lens.append(len(contig))
gc_cont = (gc / total_len) * 100 gc_cont = (gc / total_len) * 100
return contig_lens, scaffold_lens, gc_cont return contig_lens, scaffold_lens, gc_cont
def calculate_stats(seq_lens, gc_cont): def calculate_stats(seq_lens, gc_cont):
naming = sys.argv[3] naming = sys.argv[3]
stats = {} stats = {}
seq_array = np.array(seq_lens) seq_array = np.array(seq_lens)
# stats['Assembly:']=naming # stats['Assembly:']=naming
stats['sequence_count'] = seq_array.size stats['sequence_count'] = seq_array.size
testsize=stats['sequence_count'] testsize=stats['sequence_count']
stats['number_of_gaps'] = 0 stats['number_of_gaps'] = 0
# print("this is the count",naming," ", testsize) # print("this is the count",naming," ", testsize)
stats['gc_content (%)'] = gc_cont stats['gc_content (%)'] = gc_cont
sorted_lens = seq_array[np.argsort(-seq_array)] sorted_lens = seq_array[np.argsort(-seq_array)]
stats['longest (bp)'] = int(sorted_lens[0]) stats['longest (bp)'] = int(sorted_lens[0])
testlongest= stats['longest (bp)'] testlongest= stats['longest (bp)']
# print("this is the longest", naming," ",testlongest) # print("this is the longest", naming," ",testlongest)
stats['shortest (bp)'] = int(sorted_lens[-1]) stats['shortest (bp)'] = int(sorted_lens[-1])
# stats['median'] = np.median(sorted_lens) # stats['median'] = np.median(sorted_lens)
# stats['mean'] = np.mean(sorted_lens) # stats['mean'] = np.mean(sorted_lens)
stats['total_bps (bp)'] = int(np.sum(sorted_lens)) stats['total_bps (bp)'] = int(np.sum(sorted_lens))
testprint=stats['total_bps (bp)'] testprint=stats['total_bps (bp)']
# print("total_bp is", naming," ",testprint) # print("total_bp is", naming," ",testprint)
stats['estimated_size (bp)'] = int(size_bp) stats['estimated_size (bp)'] = int(size_bp)
csum = np.cumsum(sorted_lens) csum = np.cumsum(sorted_lens)
# if stats['total_bps (bp)'] < stats['estimated_size (bp)']: # if stats['total_bps (bp)'] < stats['estimated_size (bp)']:
# csum_ng = np.append(csum, stats['estimated_size (bp)']) # csum_ng = np.append(csum, stats['estimated_size (bp)'])
# else: # else:
# csum_ng=csum # csum_ng=csum
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
nx = int(stats['total_bps (bp)'] * (level / 100)) nx = int(stats['total_bps (bp)'] * (level / 100))
csumn = min(csum[csum >= nx]) csumn = min(csum[csum >= nx])
l_level = int(np.where(csum == csumn)[0]) + 1 l_level = int(np.where(csum == csumn)[0]) + 1
stats['L' + str(level)] = l_level stats['L' + str(level)] = l_level
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
# print("the totalbps are:", stats['total_bps (bp)']) # print("the totalbps are:", stats['total_bps (bp)'])
nx = int(stats['total_bps (bp)'] * (level / 100)) nx = int(stats['total_bps (bp)'] * (level / 100))
# print("this is the nx", nx) # print("this is the nx", nx)
# print("this is the csum", csum) # print("this is the csum", csum)
csumn = min(csum[csum >= nx]) csumn = min(csum[csum >= nx])
# print("this is the csumn", csumn) # print("this is the csumn", csumn)
l_level = int(np.where(csum == csumn)[0]) l_level = int(np.where(csum == csumn)[0])
n_level = int(sorted_lens[l_level]) n_level = int(sorted_lens[l_level])
stats['N' + str(level)] = n_level stats['N' + str(level)] = n_level
# print(level, " ", n_level) # print(level, " ", n_level)
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
# print("the estbps are:", stats['estimated_size (bp)']) # print("the estbps are:", stats['estimated_size (bp)'])
ngx = int(stats['estimated_size (bp)'] * (level / 100)) ngx = int(stats['estimated_size (bp)'] * (level / 100))
# print("this is the ngx", ngx) # print("this is the ngx", ngx)
# print("this is the csum", csum_ng) # print("this is the csum", csum_ng)
# print("this is the csum", csum) # print("this is the csum", csum)
# print("this is the [csum >= ngx]", np.array(csum >= ngx)) # print("this is the [csum >= ngx]", np.array(csum >= ngx))
new_array=np.array(csum >= ngx) new_array=np.array(csum >= ngx)
# print(np.any(new_array)) # print(np.any(new_array))
if np.any(new_array) == False: if np.any(new_array) == False:
csumng = csum[seq_array.size-1] csumng = csum[seq_array.size-1]
# print("this is the csumng", csumng) # print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0]) + 1 lg_level = int(np.where(csum == csumng)[0]) + 1
stats['LG' + str(level)] = lg_level stats['LG' + str(level)] = lg_level
elif np.any(new_array) == True: elif np.any(new_array) == True:
csumng = min(csum[csum >= ngx]) csumng = min(csum[csum >= ngx])
# print("this is the csumng", csumng) # print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0]) + 1 lg_level = int(np.where(csum == csumng)[0]) + 1
stats['LG' + str(level)] = lg_level stats['LG' + str(level)] = lg_level
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
ngx = int(stats['estimated_size (bp)'] * (level / 100)) ngx = int(stats['estimated_size (bp)'] * (level / 100))
new_array=np.array(csum >= ngx) new_array=np.array(csum >= ngx)
# print(np.any(new_array)) # print(np.any(new_array))
if np.any(new_array) == False: if np.any(new_array) == False:
csumng = csum[seq_array.size-1] csumng = csum[seq_array.size-1]
# print("this is the csumng", csumng) # print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0]) lg_level = int(np.where(csum == csumng)[0])
ng_level = int(sorted_lens[lg_level]) ng_level = int(sorted_lens[lg_level])
stats['NG' + str(level)] = ng_level stats['NG' + str(level)] = ng_level
elif np.any(new_array) == True: elif np.any(new_array) == True:
csumng = min(csum[csum >= ngx]) csumng = min(csum[csum >= ngx])
# print("this is the csumng", csumng) # print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0]) lg_level = int(np.where(csum == csumng)[0])
ng_level = int(sorted_lens[lg_level]) ng_level = int(sorted_lens[lg_level])
stats['NG' + str(level)] = ng_level stats['NG' + str(level)] = ng_level
return stats return stats
if __name__ == "__main__": if __name__ == "__main__":
# print(size_bp) # print(size_bp)
# print(type(size_bp)) # print(type(size_bp))
naming = sys.argv[3] naming = sys.argv[3]
infilename = sys.argv[1] infilename = sys.argv[1]
contig_lens, scaffold_lens, gc_cont = read_genome(infilename) contig_lens, scaffold_lens, gc_cont = read_genome(infilename)
# contig_stats = calculate_stats(contig_lens, gc_cont) # contig_stats = calculate_stats(contig_lens, gc_cont)
scaffold_stats = calculate_stats(scaffold_lens, gc_cont) scaffold_stats = calculate_stats(scaffold_lens, gc_cont)
contig_stats = calculate_stats(contig_lens, gc_cont) contig_stats = calculate_stats(contig_lens, gc_cont)
gaps=contig_stats.get('sequence_count') - scaffold_stats.get('sequence_count') gaps=contig_stats.get('sequence_count') - scaffold_stats.get('sequence_count')
scaffold_stats['number_of_gaps'] = gaps scaffold_stats['number_of_gaps'] = gaps
contig_stats['number_of_gaps'] = gaps contig_stats['number_of_gaps'] = gaps
# print(scaffold_stats) # print(scaffold_stats)
# df_scaffold_all= pd.DataFrame.from_dict(scaffold_stats, orient= 'index') # df_scaffold_all= pd.DataFrame.from_dict(scaffold_stats, orient= 'index')
# print(df_scaffold_all) # print(df_scaffold_all)
...@@ -193,72 +195,72 @@ if __name__ == "__main__": ...@@ -193,72 +195,72 @@ if __name__ == "__main__":
# s.index.name = 'Assembly:' # s.index.name = 'Assembly:'
# s.reset_index() # s.reset_index()
# print(s) # print(s)
scaff_seq=pd.DataFrame(scaffold_stats.items(), columns=['Assembly:', naming]) scaff_seq=pd.DataFrame(scaffold_stats.items(), columns=['Assembly:', naming])
df_scaffold_top=scaff_seq.iloc[0:7,0:2] df_scaffold_top=scaff_seq.iloc[0:7,0:2]
df_scaffold_top[naming]=df_scaffold_top[naming].astype('int64').apply('{:,}'.format) df_scaffold_top[naming]=df_scaffold_top[naming].astype('int64').apply('{:,}'.format)
# df_scaffold_top=df_scaffold_top.style.hide_index() # df_scaffold_top=df_scaffold_top.style.hide_index()
# df_scaffold_top[naming].round(decimals=0) # df_scaffold_top[naming].round(decimals=0)
# df_contig_all=pd.DataFrame(data=contig_stats) # df_contig_all=pd.DataFrame(data=contig_stats)
# df_contig_top=df_contig_all.iloc[0:6,0:2] # df_contig_top=df_contig_all.iloc[0:6,0:2]
df_scaffold_Nxx=pd.DataFrame(scaffold_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx Scaffold (bp)']) df_scaffold_Nxx=pd.DataFrame(scaffold_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx Scaffold (bp)'])
df_scaffold_Nxx=df_scaffold_Nxx.iloc[31:55,0:2] df_scaffold_Nxx=df_scaffold_Nxx.iloc[31:55,0:2]
df_scaffold_Nxx=df_scaffold_Nxx.reset_index() df_scaffold_Nxx=df_scaffold_Nxx.reset_index()
# print(df_scaffold_Nxx) # print(df_scaffold_Nxx)
df_scaffold_NGxx=pd.DataFrame(scaffold_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx Scaffold (bp)']) df_scaffold_NGxx=pd.DataFrame(scaffold_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx Scaffold (bp)'])
df_scaffold_NGxx=df_scaffold_NGxx.iloc[79:104,0:2] df_scaffold_NGxx=df_scaffold_NGxx.iloc[79:104,0:2]
df_scaffold_NGxx=df_scaffold_NGxx.reset_index() df_scaffold_NGxx=df_scaffold_NGxx.reset_index()
# print(df_scaffold_NGxx) # print(df_scaffold_NGxx)
df_scaffold_N_NG=pd.concat([df_scaffold_Nxx,df_scaffold_NGxx], axis=1) df_scaffold_N_NG=pd.concat([df_scaffold_Nxx,df_scaffold_NGxx], axis=1)
df_scaffold_N_NG=df_scaffold_N_NG.drop(df_scaffold_N_NG.columns[[0,3]], axis = 1) df_scaffold_N_NG=df_scaffold_N_NG.drop(df_scaffold_N_NG.columns[[0,3]], axis = 1)
df_scaffold_N_NG['Length of Nxx Scaffold (bp)']=df_scaffold_N_NG['Length of Nxx Scaffold (bp)'].astype('int64').apply('{:,}'.format) df_scaffold_N_NG['Length of Nxx Scaffold (bp)']=df_scaffold_N_NG['Length of Nxx Scaffold (bp)'].astype('int64').apply('{:,}'.format)
df_scaffold_N_NG['Length of NGxx Scaffold (bp)']=df_scaffold_N_NG['Length of NGxx Scaffold (bp)'].astype('int64').apply('{:,}'.format) df_scaffold_N_NG['Length of NGxx Scaffold (bp)']=df_scaffold_N_NG['Length of NGxx Scaffold (bp)'].astype('int64').apply('{:,}'.format)
# df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index() # df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index()
df_scaffold_Lxx=pd.DataFrame(scaffold_stats.items(), columns=['Lxx Level (%)', 'Count of scaffolds (for Lxx Level)']) df_scaffold_Lxx=pd.DataFrame(scaffold_stats.items(), columns=['Lxx Level (%)', 'Count of scaffolds (for Lxx Level)'])
df_scaffold_Lxx=df_scaffold_Lxx.iloc[7:31,0:2] df_scaffold_Lxx=df_scaffold_Lxx.iloc[7:31,0:2]
df_scaffold_Lxx=df_scaffold_Lxx.reset_index() df_scaffold_Lxx=df_scaffold_Lxx.reset_index()
# print(df_scaffold_Nxx) # print(df_scaffold_Nxx)
df_scaffold_LGxx=pd.DataFrame(scaffold_stats.items(), columns=['LGxx Level (%)', 'Count of scaffolds (for LGxx Level)']) df_scaffold_LGxx=pd.DataFrame(scaffold_stats.items(), columns=['LGxx Level (%)', 'Count of scaffolds (for LGxx Level)'])
df_scaffold_LGxx=df_scaffold_LGxx.iloc[55:79,0:2] df_scaffold_LGxx=df_scaffold_LGxx.iloc[55:79,0:2]
df_scaffold_LGxx=df_scaffold_LGxx.reset_index() df_scaffold_LGxx=df_scaffold_LGxx.reset_index()
# print(df_scaffold_NGxx) # print(df_scaffold_NGxx)
df_scaffold_L_LG=pd.concat([df_scaffold_Lxx,df_scaffold_LGxx], axis=1) df_scaffold_L_LG=pd.concat([df_scaffold_Lxx,df_scaffold_LGxx], axis=1)
df_scaffold_L_LG=df_scaffold_L_LG.drop(df_scaffold_L_LG.columns[[0,3]], axis = 1) df_scaffold_L_LG=df_scaffold_L_LG.drop(df_scaffold_L_LG.columns[[0,3]], axis = 1)
df_scaffold_L_LG['Count of scaffolds (for Lxx Level)']=df_scaffold_L_LG['Count of scaffolds (for Lxx Level)'].astype('int64').apply('{:,}'.format) df_scaffold_L_LG['Count of scaffolds (for Lxx Level)']=df_scaffold_L_LG['Count of scaffolds (for Lxx Level)'].astype('int64').apply('{:,}'.format)
df_scaffold_L_LG['Count of scaffolds (for LGxx Level)']=df_scaffold_L_LG['Count of scaffolds (for LGxx Level)'].astype('int64').apply('{:,}'.format) df_scaffold_L_LG['Count of scaffolds (for LGxx Level)']=df_scaffold_L_LG['Count of scaffolds (for LGxx Level)'].astype('int64').apply('{:,}'.format)
################################################################################################################ ################################################################################################################
contig_seq=pd.DataFrame(contig_stats.items(), columns=['Assembly:', naming]) contig_seq=pd.DataFrame(contig_stats.items(), columns=['Assembly:', naming])
df_contig_top=contig_seq.iloc[0:7,0:2] df_contig_top=contig_seq.iloc[0:7,0:2]
df_contig_top[naming]=df_contig_top[naming].astype('int64').apply('{:,}'.format) df_contig_top[naming]=df_contig_top[naming].astype('int64').apply('{:,}'.format)
# df_scaffold_top=df_scaffold_top.style.hide_index() # df_scaffold_top=df_scaffold_top.style.hide_index()
# df_scaffold_top[naming].round(decimals=0) # df_scaffold_top[naming].round(decimals=0)
# df_contig_all=pd.DataFrame(data=contig_stats) # df_contig_all=pd.DataFrame(data=contig_stats)
# df_contig_top=df_contig_all.iloc[0:6,0:2] # df_contig_top=df_contig_all.iloc[0:6,0:2]
df_contig_Nxx=pd.DataFrame(contig_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx contig (bp)']) df_contig_Nxx=pd.DataFrame(contig_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx contig (bp)'])
df_contig_Nxx=df_contig_Nxx.iloc[31:55,0:2] df_contig_Nxx=df_contig_Nxx.iloc[31:55,0:2]
df_contig_Nxx=df_contig_Nxx.reset_index() df_contig_Nxx=df_contig_Nxx.reset_index()
# print(df_scaffold_Nxx) # print(df_scaffold_Nxx)
df_contig_NGxx=pd.DataFrame(contig_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx contig (bp)']) df_contig_NGxx=pd.DataFrame(contig_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx contig (bp)'])
df_contig_NGxx=df_contig_NGxx.iloc[79:104,0:2] df_contig_NGxx=df_contig_NGxx.iloc[79:104,0:2]
df_contig_NGxx=df_contig_NGxx.reset_index() df_contig_NGxx=df_contig_NGxx.reset_index()
# print(df_scaffold_NGxx) # print(df_scaffold_NGxx)
df_contig_N_NG=pd.concat([df_contig_Nxx,df_contig_NGxx], axis=1) df_contig_N_NG=pd.concat([df_contig_Nxx,df_contig_NGxx], axis=1)
df_contig_N_NG=df_contig_N_NG.drop(df_contig_N_NG.columns[[0,3]], axis = 1) df_contig_N_NG=df_contig_N_NG.drop(df_contig_N_NG.columns[[0,3]], axis = 1)
df_contig_N_NG['Length of Nxx contig (bp)']=df_contig_N_NG['Length of Nxx contig (bp)'].astype('int64').apply('{:,}'.format) df_contig_N_NG['Length of Nxx contig (bp)']=df_contig_N_NG['Length of Nxx contig (bp)'].astype('int64').apply('{:,}'.format)
df_contig_N_NG['Length of NGxx contig (bp)']=df_contig_N_NG['Length of NGxx contig (bp)'].astype('int64').apply('{:,}'.format) df_contig_N_NG['Length of NGxx contig (bp)']=df_contig_N_NG['Length of NGxx contig (bp)'].astype('int64').apply('{:,}'.format)
# df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index() # df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index()
df_contig_Lxx=pd.DataFrame(contig_stats.items(), columns=['Lxx Level (%)', 'Count of contig (for Lxx Level)']) df_contig_Lxx=pd.DataFrame(contig_stats.items(), columns=['Lxx Level (%)', 'Count of contig (for Lxx Level)'])
df_contig_Lxx=df_contig_Lxx.iloc[7:31,0:2] df_contig_Lxx=df_contig_Lxx.iloc[7:31,0:2]
df_contig_Lxx=df_contig_Lxx.reset_index() df_contig_Lxx=df_contig_Lxx.reset_index()
# print(df_scaffold_Nxx) # print(df_scaffold_Nxx)
df_contig_LGxx=pd.DataFrame(contig_stats.items(), columns=['LGxx Level (%)', 'Count of contig (for LGxx Level)']) df_contig_LGxx=pd.DataFrame(contig_stats.items(), columns=['LGxx Level (%)', 'Count of contig (for LGxx Level)'])
df_contig_LGxx=df_contig_LGxx.iloc[55:79,0:2] df_contig_LGxx=df_contig_LGxx.iloc[55:79,0:2]
df_contig_LGxx=df_contig_LGxx.reset_index() df_contig_LGxx=df_contig_LGxx.reset_index()
# print(df_scaffold_NGxx) # print(df_scaffold_NGxx)
df_contig_L_LG=pd.concat([df_contig_Lxx,df_contig_LGxx], axis=1) df_contig_L_LG=pd.concat([df_contig_Lxx,df_contig_LGxx], axis=1)
df_contig_L_LG=df_contig_L_LG.drop(df_contig_L_LG.columns[[0,3]], axis = 1) df_contig_L_LG=df_contig_L_LG.drop(df_contig_L_LG.columns[[0,3]], axis = 1)
df_contig_L_LG['Count of contig (for Lxx Level)']=df_contig_L_LG['Count of contig (for Lxx Level)'].astype('int64').apply('{:,}'.format) df_contig_L_LG['Count of contig (for Lxx Level)']=df_contig_L_LG['Count of contig (for Lxx Level)'].astype('int64').apply('{:,}'.format)
df_contig_L_LG['Count of contig (for LGxx Level)']=df_contig_L_LG['Count of contig (for LGxx Level)'].astype('int64').apply('{:,}'.format) df_contig_L_LG['Count of contig (for LGxx Level)']=df_contig_L_LG['Count of contig (for LGxx Level)'].astype('int64').apply('{:,}'.format)
# df_scaffold_L_LG=df_scaffold_L_LG.style.hide_index() # df_scaffold_L_LG=df_scaffold_L_LG.style.hide_index()
# print(df_contig_top) # print(df_contig_top)
# print(scaffold_stats) # print(scaffold_stats)
...@@ -286,27 +288,27 @@ if __name__ == "__main__": ...@@ -286,27 +288,27 @@ if __name__ == "__main__":
# contig = csv.writer(open(naming + "_contig_stats.tsv", "w"), delimiter='\t') # contig = csv.writer(open(naming + "_contig_stats.tsv", "w"), delimiter='\t')
# for key, val in contig_stats.items(): # for key, val in contig_stats.items():
# #
with open(sys.argv[5], 'w') as outputfile: with open(sys.argv[5], 'w') as outputfile:
# # print('#' + libraryName, file=outputfile) # # print('#' + libraryName, file=outputfile)
# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile) # # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile)
# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile) # # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile)
# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile) # # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile)
print(df_scaffold_top.to_string(index=False), file=outputfile) print(df_scaffold_top.to_string(index=False), file=outputfile)
print("", file=outputfile) print("", file=outputfile)
print(df_scaffold_N_NG.to_string(index=False), file=outputfile) print(df_scaffold_N_NG.to_string(index=False), file=outputfile)
print("", file=outputfile) print("", file=outputfile)
print(df_scaffold_L_LG.to_string(index=False), file=outputfile) print(df_scaffold_L_LG.to_string(index=False), file=outputfile)
with open(sys.argv[6], 'w') as outputfile2: with open(sys.argv[6], 'w') as outputfile2:
# # print('#' + libraryName, file=outputfile) # # print('#' + libraryName, file=outputfile)
# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile) # # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile)
# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile) # # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile)
# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile) # # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile)
print(df_contig_top.to_string(index=False), file=outputfile2) print(df_contig_top.to_string(index=False), file=outputfile2)
print("", file=outputfile2) print("", file=outputfile2)
print(df_contig_N_NG.to_string(index=False), file=outputfile2) print(df_contig_N_NG.to_string(index=False), file=outputfile2)
print("", file=outputfile2) print("", file=outputfile2)
print(df_contig_L_LG.to_string(index=False), file=outputfile2) print(df_contig_L_LG.to_string(index=False), file=outputfile2)
# with open(sys.argv[4], 'w') as outputRst: # with open(sys.argv[4], 'w') as outputRst:
# print(tabulate(df_scaffold_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst) # print(tabulate(df_scaffold_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
...@@ -324,21 +326,21 @@ if __name__ == "__main__": ...@@ -324,21 +326,21 @@ if __name__ == "__main__":
# print(tabulate(df_contig_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2) # print(tabulate(df_contig_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
# print("", file=outputRst2) # print("", file=outputRst2)
with open(sys.argv[4], 'w') as outputRst: with open(sys.argv[4], 'w') as outputRst:
print(tabulate(df_scaffold_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst) print(tabulate(df_scaffold_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
print("", file=outputRst) print("", file=outputRst)
print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst) print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
print("", file=outputRst) print("", file=outputRst)
print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst) print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
print("", file=outputRst) print("", file=outputRst)
# #
with open(sys.argv[5], 'w') as outputRst2: with open(sys.argv[5], 'w') as outputRst2:
print(tabulate(df_contig_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2) print(tabulate(df_contig_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
print("", file=outputRst2) print("", file=outputRst2)
print(tabulate(df_contig_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2) print(tabulate(df_contig_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
print("", file=outputRst2) print("", file=outputRst2)
print(tabulate(df_contig_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2) print(tabulate(df_contig_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
print("", file=outputRst2) print("", file=outputRst2)
# list_of_dfs=[df_scaffold_top,df_scaffold_N_NG,df_scaffold_L_LG] # list_of_dfs=[df_scaffold_top,df_scaffold_N_NG,df_scaffold_L_LG]
# for df in list_of_dfs: # for df in list_of_dfs:
# with open('all_dfs.tsv','a') as f: # with open('all_dfs.tsv','a') as f:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment