You can do the same with seqkit grep
https://github.com/shenwei356/seqkit
Delete the array containing the ID list file, or extract only that array.
fasta_extract.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the hit array to standard output
import sys
from Bio import SeqIO
fasta_in = sys.argv[1]                            #In the first argument, specify the fasta file you want to change.
query = sys.argv[2]                          #Specify the file that describes the keyID for each line in the second argument
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
    id_part = record.id                       #Read the ID part of fasta
    m_part = id_part.rstrip()        #chomp and m_Put in part
    description_part = record.description
    seq = record.seq                          #Read the array part of fastan
    for q in open(query, "r"):                     #Open annotation information file
        if m_part == q.rstrip():            #If the id part of the fasta file and the id part of the changer item match. ..
            fasta_seq = '>' + description_part + '\n' + seq      #Arrange in fasta format
            print(fasta_seq)                  #Output fasta to standard output
fasta_remove.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the array that did not hit to the standard output
import sys
from Bio import SeqIO
fasta_in = sys.argv[1]                            #In the first argument, specify the fasta file you want to change.
query = sys.argv[2]                          #Specify the file that describes the keyID for each line in the second argument
hit = 0
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
    id_part = record.id                       #Read the ID part of fasta
    m_part = id_part.rstrip()        #chomp and m_Put in part
    description_part = record.description
    seq = record.seq                          #Read the array part of fastan
    for q in open(query, "r"):                     #Open annotation information file
        if m_part == q.rstrip():            #If the id part of the fasta file and the id part of the changer item match. ..
            hit += 1
    if hit == 0:
        fasta_seq = '>' + description_part + '\n' + seq      #Arrange in fasta format
        print(fasta_seq)                  #Output fasta to standard output
    hit = 0
fasta_extract_cont.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the hit array to standard output(Version corresponding to partial match)
import sys
from Bio import SeqIO
fasta_in = sys.argv[1]                            #In the first argument, specify the fasta file you want to change.
query = sys.argv[2]                          #Specify the file that describes the keyID for each line in the second argument
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
    id_part = record.id                       #Read the ID part of fasta
    m_part = id_part.rstrip()        #chomp and m_Put in part
    description_part = record.description
    seq = record.seq                          #Read the array part of fastan
    for q in open(query, "r"):                     #Open annotation information file
        if q.rstrip() in m_part:            #Include or search
            fasta_seq = '>' + description_part + '\n' + seq      #Arrange in fasta format
            print(fasta_seq)                  #Output fasta to standard output
fasta_remove_cont.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#fasta_Set the id item to key and output the hit array to standard output(Version corresponding to partial match)
import sys
from Bio import SeqIO
fasta_in = sys.argv[1]                            #In the first argument, specify the fasta file you want to change.
query = sys.argv[2]                          #Specify the file that describes the keyID for each line in the second argument
hit = 0
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
    id_part = record.id                       #Read the ID part of fasta
    m_part = id_part.rstrip()        #chomp and m_Put in part
    description_part = record.description
    seq = record.seq                          #Read the array part of fastan
    for q in open(query, "r"):                     #Open annotation information file
        if q.rstrip() in m_part:            #Include or search
            hit += 1
    if hit == 0:
        fasta_seq = '>' + description_part + '\n' + seq      #Arrange in fasta format
        print(fasta_seq)                  #Output fasta to standard output
    hit = 0
fasta_remove_V3.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from Bio import SeqIO
fasta_in = sys.argv[1]                            #In the first argument, specify the fasta file you want to change.
query = sys.argv[2]                          #Specify the file that describes the search keyword for each line in the second argument
hit = 0
for record in SeqIO.parse(fasta_in, 'fasta'): #Open fasta file Parse using SeqIO(Read one item at a time)
    id_part = record.description                       #Read the description part of fasta
    m_part = id_part.rstrip()        #chomp and m_Put in part
    description_part = record.description
    seq = record.seq                          #Read the array part of fastan
    for q in open(query, "r"):                     #Open annotation information file
        if  q.rstrip() in m_part:            #If the id part of the fasta file and the id part of the changer item match. ..
            hit += 1
    if hit == 0:
        fasta_seq = '>' + description_part + '\n' + seq      #Arrange in fasta format
        print(fasta_seq)                  #Output fasta to standard output
    hit = 0
        Recommended Posts