python - 为什么当“转换的字节”除以 2 时,PyAudio 流的音量有时会“减半”,有时会产生不需要的白噪声?
问题描述
我对在 Python 3.7 中使用 PyAudio 模块是“新手”,并且已经使用 PyAudio 接口做了一些“整洁”的事情。我已经弄清楚如何生成和播放一些“自定义音高”,这些音高按顺序排列在一起,其中数据首先转换为 -32768 到 +32768 范围(使用int(n).to_bytes()
然后n = data.from_bytes()
从字节到整数来回转换,更改值,然后为流再次转换回字节)。
虽然我的值是整数,但我可以除以 2 以将“自定义音高”的音量“减半”,但是,当我除以 2 时,如果我使“n”(用于整数值的整数变量)等于'sounds\hello.wav' 文件的转换数据,它不会将音量“减半”,而是会产生不需要的白噪声。如果我不除以 2,我的 'sounds\hello.wav' 文件可以正常播放。
我的评论在“全部大写”中,这就是“问题”所在。“大写注释部分”显示了四个不同的“选项”,可用于“n”的值,然后将 n 转换回字节并写入流。这四个“选项”中的三个有效,但我一直试图弄清楚为什么“第四个选项”会给我带来“问题”。“重现问题”的“四个选项”是我的代码生成两个警告的原因,而不是“程序问题”。我正在做的事情,也许有一天会帮助创造一种全新的声音和音乐技术。这是我的代码...
import math
import time
import wave
import pyaudio
pitches = 0
position = []
start = time.time()
started = True
oldTime = 0
delta = 0
run_time = 0
val = []
lastVal = []
lastVal2 = []
count = 0
def get_pitches():
global val
global run_time
global lastVal
global lastVal2
global position
global pitches
n = 0
val = []
pitches = 0
# Store the offset and the increment (through time) into result.
run_time += delta
# PITCHES GO HERE.
n += add_pitch_with_time_stamp(offset = 0.0, increment = 0.0, volume = 0.5, pitch_stamp=[0.01, 0.015, 0.02, 0.015], time_stamp=[.5, .5, .5, .5], transition_time_stamp = [10, 10, 10, 10], voice = "sounds\\ah.wav")
n += add_pitch_with_time_stamp(offset = 0.0, increment = 0.0, volume = 0.5, pitch_stamp=[0.01, 0.02, 0.03, 0.02], time_stamp=[1, 1, 1, 1], transition_time_stamp = [10, 10, 10, 10], voice = "sounds\\ah.wav")
#n += add_pitch(offset = .01, increment = .1, volume = 1)
#n += add_pitch(offset = 0.015, increment = -.001, volume = 1)
#n += add_pitch(offset = 0.04, increment = 0, volume = 1)
# Average out the pitches before returning n.
if pitches != 0:
n /= pitches
return n
def add_pitch(offset, increment, volume):
global pitches
global delta
global run_time
global val
global lastVal
global lastVal2
global position
# Match the size of arrays for positions and last recorded values.
if pitches >= len(position):
position.append(0)
if pitches >= len(lastVal):
lastVal.append(0)
if pitches >= len(lastVal2):
lastVal2.append(0)
# Get the calculated pitch for the wave.
pitch = ((run_time - start) * increment) + offset
# If the pitch is out of range set the result to 0.
if 0.3 > pitch >= 0:
if pitches < len(lastVal):
lastVal2[pitches] = lastVal[pitches]
val.append((1 + math.sin(((position[len(val) - 1]) * pitch) * math.pi * 2) * 0.5 * volume) - 0.5)
if pitches < len(lastVal):
lastVal[pitches] = val[len(val) - 1]
result = ((val[len(val) - 1] * 0x7f) + 0x80)
else:
result = 0
# Increase pitches per function call to determine the average value for n.
pitches += 1
else:
result = 0
return result
def add_pitch_with_time_stamp(offset, increment, volume, pitch_stamp, time_stamp=None, transition_time_stamp=None, voice=None):
global pitches
global delta
global run_time
global val
global lastVal
global lastVal2
global position
# Match size for time stamp.
for i in range(0, len(time_stamp)):
if (i + 1) > len(time_stamp):
time_stamp.append(1)
# Match size for transition time stamp.
for i in range(0, len(pitch_stamp)):
if (i + 1) > len(transition_time_stamp):
transition_time_stamp.append(1)
# Get a total time modulation from the time stamp.
time_modulation = 0
for i in range(0, len(time_stamp)):
time_modulation += time_stamp[i]
# Get the time index.
time_flow = (time.time() - start) % time_modulation
# Store the time transitions into f.
f = get_transition_value(pitch_stamp, transition_time_stamp, time_stamp, time_flow)
# Match the size of arrays for positions and last recorded values.
if pitches >= len(position):
position.append(0)
if pitches >= len(lastVal):
lastVal.append(0)
if pitches >= len(lastVal2):
lastVal2.append(0)
# Get the calculated pitch for the wave.
pitch = (((run_time - start) * increment) + (f + offset))
# If the pitch is out of range set the result to 0.
if 0.3 > pitch >= 0:
if pitches < len(lastVal):
lastVal2[pitches] = lastVal[pitches]
#print (data2)
if voice is None:
val.append((1 + math.sin(((position[len(val) - 1]) * pitch) * math.pi * 2) * 0.5 * volume) - 0.5)
else:
val.append((1 + math.sin(((position[len(val) - 1]) * pitch) * math.pi * 2) * 0.5 * volume) - 0.5)
if pitches < len(lastVal):
lastVal[pitches] = val[len(val) - 1]
result = ((val[len(val) - 1] * 0x7f) + 0x80)
else:
result = 0
# Increase pitches per function call to determine the average value for n.
pitches += 1
else:
result = 0
return result
def get_transition_value(value_list, transition_list, t_stamp, t_flow):
t_total = 0
t_position = t_flow
t_index = 0
for i in range(0, len(t_stamp)):
t_total += t_stamp[i]
if t_flow >= t_total:
t_position -= t_stamp[i]
t_index = i + 1
#t_process is the fraction of time between each transition.
t_process = t_position / t_stamp[t_index]
# Get the current value from the time stamp.
v_floor = value_list[t_index % len(value_list)]
# Get the next value from the time stamp.
v_ceil = value_list[(t_index + 1) % len(value_list)]
# Determine the 'power' between each transition
transform_power = transition_list[int(t_flow) % len(value_list)]
return transition(v_floor, v_ceil, math.pow(t_process % 1, transform_power))
def transition (down, up, mid):
# Another function for finding in between values.
return (down * (1 - mid)) + (up * mid)
def get_delta_time():
# Store the delta time into a delta variable.
global delta
global oldTime
delta = time.time()-oldTime
oldTime = time.time()
def do_pitches():
global pitches
global position
global started
global lastVal
global lastVal2
global count
global delta
# Create an interface to PortAudio
p = pyaudio.PyAudio()
wf = wave.open("sounds\\hello.wav", 'rb')
# Open a .Stream object to write the WAV file to
# 'output = True' indicates that the sound will be played rather than recorded
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), # 8bit
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
try:
while True:
# Make a variable called 'n', and set it to 'silent' (0).
pitches = 0
# Store pitches in n.
n: int = 0
if started:
position = []
lastVal = []
lastVal2 = []
#n = get_pitches()
for i in range(0, pitches):
position.append(0)
lastVal.append(0)
lastVal2.append(0)
started = False
# Read the voice data 1 frame at a time.
data2 = wf.readframes(1)
# Convert the data from byte format, into an integer value ranging from -32768 to 32768.
v = int.from_bytes(data2, 'big')
# NOTE:
#
# If I use this line only, without dividing the value of n by 2,
# it works fine.
#
# This line takes the values of all the pitches (averaged) placed
# in the get_pitches() function before later converting 'n' to a
# byte value (called 'data') and then writing 'data' to the stream.
#
n = (transition(n, get_pitches(), 1))
# NOTE:
#
# If I use this line only, without dividing the value of n by 2,
# it works fine.
#
# This line will play a .wav file called 'sound\hello.wav' before
# later converting n to a byte value (called data) and writing
# data to the stream.
#
n = v
# NOTE:
#
# If I use this line only, dividing the value of n by 2, it
# works fine.
#
# This line takes the values of all the pitches (averaged) in
# the get_pitches() function ... and will 'halve' the volume
# (as it is supposed to do since i 'half-ed' the value).
#
# The value of n later gets converted to a byte value (called
# 'data') and gets written to the stream.
#
n = (transition(n, get_pitches(), 1)) / 2
# NOTE:
#
# ***problem***: if I use this line only, and dividing the value
# of n by 2, this produces unwanted white noise instead of
# 'halving' the volume, even though i divided the value
# of n by 2.
#
# This line should play a .wav file called 'sound\hello.wav'
# before later converting n to a byte value (called data)
# and writing data to the stream.
#
n = int(v / 2)
# Convert the value of 'n' into bytes.
data = int(n).to_bytes(2, 'big')
#wf.setpos(int((time.time() - start) % wf.getsampwidth()))
#print (int(time.time() % wf.getsampwidth()))
# Writing data to stream makes the sound.
stream.write(data)
# Write voice to voice stream.
#stream2.write(data2)
# Increment position so that the 'n' result (from getPitches)
# produces a sine-wave.
for i in range (0, len(position)):
position[i] += 1
# Limit each position to 1000 chunks to prevent popping.
if count % 1000 == 0:
position[i] = 0
get_delta_time()
count += 1
except KeyboardInterrupt:
pass
# In the case the while loop breaks.
stream.close()
p.terminate()
do_pitches()
解决方案
我发现了问题...我使用了一个名为“top.wav”的 .wav 文件,该文件具有 wav 可以包含的最大可能正值的 1 秒,以及另一个名为“bottom.wav”的 .wav 文件,该文件具有最大可能负值的 1 秒(查看这 2 个 .wav 文件生成的值,以便我可以完全理解“字节系统”的工作原理)。
我没有将“数据”的字节值转换为整数(使用 int.from_bytes()),而是发现了一个名为 struct.unpack() 的函数,该函数获取字节数据,并将其以正确的方式转换为具有值(<整数数据值>,0)
我从使用中获得了真正的价值...
解码[0]
使用代码时...
...
fmt = "<" + "h"
if data != b'' and data != b'\x00\x00\x00\x00':
decoded = struct.unpack(fmt, data)
if data == b'\x00\x00\x00\x00':
decoded = (0, )
...
然后,我注意到该值被“加扰”到任何“范围”从 0 到 128 的地方都需要转换为 128 -(值 - 1),而范围从 129 到 256 的任何东西都需要转换为( 256 - (value - 128)) - 1 ...所以我不得不编写一个名为“反转值”的函数...
...
#This function makes values 0 to 128, 128 to 0 and values 129 to 256, 256 to 129.
def invert_values(n):
if n < 128:
n = 128 - (n - 1)
if 128 <= n < 256:
n = (256 - (n - 128)) - 1
return n
...
之后做我的算术,
利用...
n = invert_values(n)
在将 n 转换回字节值之前,我的 wav 可以正常播放。当我除以 2 时,我的音量“减半”。
推荐阅读
- php - 以大写形式在 PHP 中输出 SQL 查询
- php - 如果 URL 中没有 index.php,则无法访问 Codeigniter 控制器
- javascript - 将 maxDataPoints 放入 grafana 数据源
- c++ - 如何在 C++ 项目的编译时使用 Visual Studio 宏
- spotfire - Spotfire 折线图平滑
- javascript - 我们可以在多维数组中使用递归来推送新值吗
- node.js - 如何在 node-mongodb-native 3.1 中完全关闭 MongoDB 连接+套接字?
- java - com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException 即使类中存在字段
- typescript - 如何在 VS Code / TypeScript 中禁用多余的自动导入建议?
- c# - 如何在 C# 中具有不同列名的两个不同数据表之间复制数据?