I referred to the following site. The source code is basically used as it is without being rewritten.
[ http://aidiary.hatenablog.com/entry/20120701/1341126474 ] How to use SPTK (1) Installation, waveform drawing, audio playback
[ http://aidiary.hatenablog.com/entry/20150225/1424863972 ] Statistical voice conversion (2) Let's make a voice changer
*** Now recording ... (10 sec)
*** extract pitch ...
*** extract mel cepstrum
*** modify parameters ...
'x2x' �́A�����R�}���h�܂��͊O���R�}���h�A
����\�ȃv���O�����܂��̓o�b�` �t�@�C���Ƃ��ĔF������Ă��܂���B
'x2x' �́A�����R�}���h�܂��͊O���R�}���h�A
����\�ȃv���O�����܂��̓o�b�` �t�@�C���Ƃ��ĔF������Ă��܂���B
'sopr' �́A�����R�}���h�܂��͊O���R�}���h�A
����\�ȃv���O�����܂��̓o�b�` �t�@�C���Ƃ��ĔF������Ă��܂���B
*** play!
'sopr' �́A�����R�}���h�܂��͊O���R�}���h�A
����\�ȃv���O�����܂��̓o�b�` �t�@�C���Ƃ��ĔF������Ă��܂���B
Traceback (most recent call last):
File "C:/Users/joho-2/PycharmProjects/R sound 10/R sound 10.py", line 127, in <module>
play("output.raw")
File "C:/Users/joho-2/PycharmProjects/R sound 10/R sound 10.py", line 78, in play
f = open(raw_file, "rb")
IOError: [Errno 2] No such file or directory: 'output.raw'
#coding: utf-8
import pyaudio
import struct
import subprocess
#Simple voice changer with SPTK
CHANNELS = 1
RATE = 16000
CHUNK = 1024
def record(raw_file, record_seconds=5):
"""Record an audio file
Recording time is fixed. I couldn't end the loop when I pressed the keyboard ..."""
fp = open(raw_file, "wb")
for _ in range(0, int(RATE / CHUNK * record_seconds)):
data = stream.read(CHUNK)
fp.write(struct.pack('s' * CHUNK * 2, *data))
fp.close()
stream.stop_stream()
stream.close()
p.terminate()
def extract_pitch(raw_file, pitch_file):
"""Extraction of pitch parameters"""
cmd = "x2x +sf %s | pitch -a 1 -s 16 -p 80 > %s" % (raw_file, pitch_file)
subprocess.call(cmd, shell=True)
def extract_mcep(raw_file, mcep_file):
"""Extraction of mer cepstrum parameters"""
cmd = "x2x +sf %s | frame -p 80 | window | mcep -m 25 -a 0.42 > %s" % (raw_file, mcep_file)
subprocess.call(cmd, shell=True)
def modify_pitch(m, pitch_file, mcep_file, raw_file):
"""Deform the pitch and resynthesize
m is greater than 1=>Low voice
m is less than 1=>High voice"""
cmd = "sopr -m %f %s | excite -p 80 | mlsadf -m 25 -a 0.42 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (m, pitch_file, mcep_file, raw_file)
subprocess.call(cmd, shell=True)
def modify_speed(frame_shift, pitch_file, mcep_file, raw_file):
"""Deform the speaking speed and resynthesize
frame_shift is small=>Fast talk
frame_shift is large=>slowly"""
cmd = "excite -p %f %s | mlsadf -m 25 -a 0.42 -p %f %s | clip -y -32000 32000 | x2x +fs > %s" % (frame_shift, pitch_file, frame_shift, mcep_file, raw_file)
subprocess.call(cmd, shell=True)
def hoarse_voice(pitch_file, mcep_file, raw_file):
"""Whispering"""
modify_pitch(0, pitch_file, mcep_file, raw_file)
def robot_voice(frame_period, record_seconds, mcep_file, raw_file):
"""Robot voice
frame_period is small=>Low
frame_period is large=>high"""
sequence_length = record_seconds * RATE * frame_period
cmd = "train -p %d -l %d | mlsadf -m 25 -a 0.42 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (frame_period, sequence_length, mcep_file, raw_file)
subprocess.call(cmd, shell=True)
def child_voice(pitch_file, mcep_file, raw_file):
"""Child voice"""
cmd = "sopr -m 0.4 %s | excite -p 80 | mlsadf -m 25 -a 0.1 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (pitch_file, mcep_file, raw_file)
subprocess.call(cmd, shell=True)
def deep_voice(pitch_file, mcep_file, raw_file):
"""Thick voice"""
cmd = "sopr -m 2.0 %s | excite -p 80 | mlsadf -m 25 -a 0.6 -p 80 %s | clip -y -32000 32000 | x2x +fs > %s" % (pitch_file, mcep_file, raw_file)
subprocess.call(cmd, shell=True)
def raw2wav(raw_file, wav_file):
cmd = "sox -e signed-integer -c %d -b 16 -r %d %s %s" % (CHANNELS, RATE, raw_file, wav_file)
subprocess.call(cmd, shell=True)
def play(raw_file):
"""Play raw file"""
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(2), channels=CHANNELS, rate=RATE, output=True)
f = open(raw_file, "rb")
data = f.read(CHUNK)
while data != '':
stream.write(data)
data = f.read(CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == "__main__":
#Recording time (fixed)
record_seconds = 10
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
pitch_file = "temp.pitch"
mcep_file = "temp.mcep"
raw_file = "temp.raw"
output_file = "output.raw"
#Record the original audio and export it as a raw file
print "*** Now recording ... (%d sec)" % record_seconds
record(raw_file, record_seconds)
#Parameter extraction
print "*** extract pitch ..."
extract_pitch(raw_file, pitch_file)
print "*** extract mel cepstrum"
extract_mcep(raw_file, mcep_file)
#Various parameter transformations
print "*** modify parameters ..."
#Only one can be enabled
modify_pitch(0.3, pitch_file, mcep_file, output_file)
# modify_speed(300, pitch_file, mcep_file, output_file)
# hoarse_voice(pitch_file, mcep_file, output_file)
# robot_voice(100, record_seconds, mcep_file, output_file)
# child_voice(pitch_file, mcep_file, output_file)
deep_voice(pitch_file, mcep_file, output_file)
#Play the converted audio
print "*** play!"
play("output.raw")
I thought it was a problem with the Python version, so I changed from 3 series to 2 series, but it didn't work ... I thought that I couldn't record in the first place, so I reviewed the site many times and studied. The error didn't go away from here and it just didn't work ;;
I'm a beginner who just studied Python recently. I would be really happy if you could give me some advice. Thank you very much.
Recommended Posts