You can do the same with seqkit grep
https://github.com/shenwei356/seqkit
Delete the array containing the ID list file, or extract only that array.
fasta_extract.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the hit array to standard output
import sys
from Bio import SeqIO
fasta_in = sys.argv[1] #In the first argument, specify the fasta file you want to change.
query = sys.argv[2] #Specify the file that describes the keyID for each line in the second argument
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
id_part = record.id #Read the ID part of fasta
m_part = id_part.rstrip() #chomp and m_Put in part
description_part = record.description
seq = record.seq #Read the array part of fastan
for q in open(query, "r"): #Open annotation information file
if m_part == q.rstrip(): #If the id part of the fasta file and the id part of the changer item match. ..
fasta_seq = '>' + description_part + '\n' + seq #Arrange in fasta format
print(fasta_seq) #Output fasta to standard output
fasta_remove.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the array that did not hit to the standard output
import sys
from Bio import SeqIO
fasta_in = sys.argv[1] #In the first argument, specify the fasta file you want to change.
query = sys.argv[2] #Specify the file that describes the keyID for each line in the second argument
hit = 0
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
id_part = record.id #Read the ID part of fasta
m_part = id_part.rstrip() #chomp and m_Put in part
description_part = record.description
seq = record.seq #Read the array part of fastan
for q in open(query, "r"): #Open annotation information file
if m_part == q.rstrip(): #If the id part of the fasta file and the id part of the changer item match. ..
hit += 1
if hit == 0:
fasta_seq = '>' + description_part + '\n' + seq #Arrange in fasta format
print(fasta_seq) #Output fasta to standard output
hit = 0
fasta_extract_cont.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the hit array to standard output(Version corresponding to partial match)
import sys
from Bio import SeqIO
fasta_in = sys.argv[1] #In the first argument, specify the fasta file you want to change.
query = sys.argv[2] #Specify the file that describes the keyID for each line in the second argument
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
id_part = record.id #Read the ID part of fasta
m_part = id_part.rstrip() #chomp and m_Put in part
description_part = record.description
seq = record.seq #Read the array part of fastan
for q in open(query, "r"): #Open annotation information file
if q.rstrip() in m_part: #Include or search
fasta_seq = '>' + description_part + '\n' + seq #Arrange in fasta format
print(fasta_seq) #Output fasta to standard output
fasta_remove_cont.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the hit array to standard output(Version corresponding to partial match)
import sys
from Bio import SeqIO
fasta_in = sys.argv[1] #In the first argument, specify the fasta file you want to change.
query = sys.argv[2] #Specify the file that describes the keyID for each line in the second argument
hit = 0
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
id_part = record.id #Read the ID part of fasta
m_part = id_part.rstrip() #chomp and m_Put in part
description_part = record.description
seq = record.seq #Read the array part of fastan
for q in open(query, "r"): #Open annotation information file
if q.rstrip() in m_part: #Include or search
hit += 1
if hit == 0:
fasta_seq = '>' + description_part + '\n' + seq #Arrange in fasta format
print(fasta_seq) #Output fasta to standard output
hit = 0
fasta_remove_V3.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from Bio import SeqIO
fasta_in = sys.argv[1] #In the first argument, specify the fasta file you want to change.
query = sys.argv[2] #Specify the file that describes the search keyword for each line in the second argument
hit = 0
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
id_part = record.description #Read the description part of fasta
m_part = id_part.rstrip() #chomp and m_Put in part
description_part = record.description
seq = record.seq #Read the array part of fastan
for q in open(query, "r"): #Open annotation information file
if q.rstrip() in m_part: #If the id part of the fasta file and the id part of the changer item match. ..
hit += 1
if hit == 0:
fasta_seq = '>' + description_part + '\n' + seq #Arrange in fasta format
print(fasta_seq) #Output fasta to standard output
hit = 0
Recommended Posts