Difference between revisions of "IC Python API:Audio Visualizer"
Chuck (RL) (Talk | contribs) (Created page with "{{TOC}} {{Parent|IC_Python_API:RL_Python_Samples|RL Python Samples}} == Required Modules == Besides the rudimentary Reallusion Python API we'll need the following modules: *...") |
Chuck (RL) (Talk | contribs) m (→Audio Bit-Depth) |
||
Line 45: | Line 45: | ||
Audio files just like images can be save with certain bit-depths depending on the fidelity of the audio to the original sound quality. Just like images, audio files with higher bit-depths have better "accuracy" and are more faithful to the recorded source audio. | Audio files just like images can be save with certain bit-depths depending on the fidelity of the audio to the original sound quality. Just like images, audio files with higher bit-depths have better "accuracy" and are more faithful to the recorded source audio. | ||
− | Without installing and relying on the '''soundfile''' module, we can guess the bit-depth from the maximum signal value of an audio file. For example, if the highest signal magnitude (signed) is 32,767 then it is definitely | + | Without installing and relying on the '''soundfile''' module, we can guess the bit-depth from the maximum signal value of an audio file. For example, if the highest signal magnitude (signed) is 32,767 then it is definitely within the range of 8bit (256) and 16bit (65,536); So it's safe to assume that the audio is 16 bit. |
We can use the following function to tease out the bit-depth: | We can use the following function to tease out the bit-depth: | ||
− | < | + | <syntaxhighlight lang="python"> |
def guess_bit_depth(value): | def guess_bit_depth(value): | ||
bits = [2**8., 2**16., 2**24., 2**32.] | bits = [2**8., 2**16., 2**24., 2**32.] | ||
Line 59: | Line 59: | ||
Again, this is a crude implementation, for more robust method you should use the '''soundfile''' module and read the audio '''sub-type''' attribute. | Again, this is a crude implementation, for more robust method you should use the '''soundfile''' module and read the audio '''sub-type''' attribute. | ||
+ | |||
+ | == Plotting the Signals == | ||
+ | |||
+ | We'll be using Qt's '''QGraphicsScene''' and '''QGraphicsView''' to plot out the sound waveform. '''QGraphicsView''' is a widget that is embedded inside the .ui file. | ||
+ | |||
+ | <syntaxhighlight lang="python"> | ||
+ | def plot_signal(framerate, signal): | ||
+ | |||
+ | widget.progressBar.setVisible(True) | ||
+ | |||
+ | blue_pen = QtGui.QPen(QtGui.QColor(0, 255, 255)) | ||
+ | grey_pen = QtGui.QPen(QtGui.QColor(128, 128, 128)) | ||
+ | scene = QtWidgets.QGraphicsScene() | ||
+ | bit_depth = guess_bit_depth(max(signal)) | ||
+ | factor = 150 / bit_depth * -2 | ||
+ | widget.progressBar.setRange(0, len(signal)) | ||
+ | ticks = 0 | ||
+ | interval = int(framerate * 0.005) # 5 millisecond interval | ||
+ | |||
+ | for i in range(1, 11): # Draw horizontal lines for frequency ratio | ||
+ | scene.addLine(0, i * 15, len(signal), i*15, grey_pen) | ||
+ | scene.addLine(0, -i * 15, len(signal), -i*15, grey_pen) | ||
+ | if i % 2 == 0: | ||
+ | text = scene.addText(str(round(i * 0.1, 1))) | ||
+ | text.setDefaultTextColor(QtGui.QColor(128, 128, 128)) | ||
+ | text.setPos(0, i * -15) | ||
+ | |||
+ | for i in range(len(signal)): | ||
+ | if i == ticks * interval: | ||
+ | text = scene.addText(str(round(ticks * 0.005, 3))) | ||
+ | text.setDefaultTextColor(QtGui.QColor(128, 128, 128)) | ||
+ | text.setPos(i, 135) | ||
+ | # Add vertical lines for time value | ||
+ | scene.addLine(i, -150, i, 150, grey_pen) | ||
+ | ticks += 1 | ||
+ | scene.addLine(i, 0, i, signal[i] * factor, blue_pen) | ||
+ | widget.progressBar.setValue(i) | ||
+ | |||
+ | widget.progressBar.setVisible(False) | ||
+ | widget.graphicsView.setScene(scene) | ||
+ | </syntaxhighlight> | ||
+ | |||
+ | == Reading the Audio File == | ||
+ | |||
+ | |||
+ | |||
+ | <syntaxhighlight lang="python"> | ||
+ | def open_audio_file(): | ||
+ | dialog = QtWidgets.QFileDialog() | ||
+ | dialog.setNameFilter("*.wav") | ||
+ | dialog.exec() | ||
+ | |||
+ | if(len(dialog.selectedFiles()) > 0): | ||
+ | widget.path.setText(dialog.selectedFiles()[0]) | ||
+ | wav = wave.open(dialog.selectedFiles()[0], 'rb') | ||
+ | nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams() | ||
+ | str_data = wav.readframes(nframes) | ||
+ | # Convert bytes to decimals | ||
+ | wav_data = np.fromstring(str_data, dtype=np.short) | ||
+ | if nchannels == 2: | ||
+ | # Stereo sounds are two mono tracks interleaved so we need to: | ||
+ | # Separate the signals into 2 channels for left & right as tuples (indefinite rows and 2 columns) | ||
+ | wav_data.shape = -1, 2 | ||
+ | # Transpose the 2 arrays into a single array of left & right channels: 0[l][r], 1[l][r], 2[l][r] | ||
+ | wav_data = wav_data.T | ||
+ | # Put the highest frequency in wav_data[0], equivalent to merging stereo to mono | ||
+ | for i in range(len(wav_data[0])): | ||
+ | if wav_data[0][i] < wav_data[1][i]: | ||
+ | wav_data[0][i] = wav_data[1][i] | ||
+ | # Select only 1 channel, now with the highest frequencies | ||
+ | wav_data = wav_data[0] | ||
+ | plot_signal(framerate, wav_data) | ||
+ | else: | ||
+ | print("Valid path for sound file not found!") | ||
+ | |||
+ | |||
+ | widget.button.clicked.connect(open_audio_file) | ||
+ | |||
+ | window.Show() | ||
+ | </syntaxhighlight> | ||
+ | |||
+ | == Everything Put Together == | ||
+ | |||
+ | <syntaxhighlight="python"> | ||
+ | |||
+ | </syntaxhighlight> |
Revision as of 19:49, 23 June 2019
- Main article: RL Python Samples.
Required Modules
Besides the rudimentary Reallusion Python API we'll need the following modules:
- os for reading from the Windows file system.
- numpy for transforming arrays.
- wave for reading wav file data.
- Pyside2 for building the user interface.
import RLPy
import os
import numpy as np
import wave
from PySide2 import *
from PySide2.shiboken2 import wrapInstance
User Interface
For the sake of convenience, we'll be reading from a QT UI file. You can download Audio_Visualizer.ui File:Audio Visualizer.ui.
window = RLPy.RUi.CreateRDockWidget()
window.SetWindowTitle("Audio Visualizer (Mono)")
window.SetAllowedAreas(RLPy.EDockWidgetAreas_BottomDockWidgetArea)
dock = wrapInstance(int(window.GetWindow()), QtWidgets.QDockWidget)
dock.resize(1000, 300)
ui = QtCore.QFile(os.path.dirname(__file__) + "/Audio_Visualizer.ui")
ui.open(QtCore.QFile.ReadOnly)
widget = QtUiTools.QUiLoader().load(ui)
ui.close()
widget.progressBar.setVisible(False)
dock.setWidget(widget)
Audio Bit-Depth
Audio files just like images can be save with certain bit-depths depending on the fidelity of the audio to the original sound quality. Just like images, audio files with higher bit-depths have better "accuracy" and are more faithful to the recorded source audio.
Without installing and relying on the soundfile module, we can guess the bit-depth from the maximum signal value of an audio file. For example, if the highest signal magnitude (signed) is 32,767 then it is definitely within the range of 8bit (256) and 16bit (65,536); So it's safe to assume that the audio is 16 bit.
We can use the following function to tease out the bit-depth:
def guess_bit_depth(value):
bits = [2**8., 2**16., 2**24., 2**32.]
bits.append(value)
bits.sort()
bit_depth = bits[bits.index(value)+1]
return bit_depth
Again, this is a crude implementation, for more robust method you should use the soundfile module and read the audio sub-type attribute.
Plotting the Signals
We'll be using Qt's QGraphicsScene and QGraphicsView to plot out the sound waveform. QGraphicsView is a widget that is embedded inside the .ui file.
def plot_signal(framerate, signal):
widget.progressBar.setVisible(True)
blue_pen = QtGui.QPen(QtGui.QColor(0, 255, 255))
grey_pen = QtGui.QPen(QtGui.QColor(128, 128, 128))
scene = QtWidgets.QGraphicsScene()
bit_depth = guess_bit_depth(max(signal))
factor = 150 / bit_depth * -2
widget.progressBar.setRange(0, len(signal))
ticks = 0
interval = int(framerate * 0.005) # 5 millisecond interval
for i in range(1, 11): # Draw horizontal lines for frequency ratio
scene.addLine(0, i * 15, len(signal), i*15, grey_pen)
scene.addLine(0, -i * 15, len(signal), -i*15, grey_pen)
if i % 2 == 0:
text = scene.addText(str(round(i * 0.1, 1)))
text.setDefaultTextColor(QtGui.QColor(128, 128, 128))
text.setPos(0, i * -15)
for i in range(len(signal)):
if i == ticks * interval:
text = scene.addText(str(round(ticks * 0.005, 3)))
text.setDefaultTextColor(QtGui.QColor(128, 128, 128))
text.setPos(i, 135)
# Add vertical lines for time value
scene.addLine(i, -150, i, 150, grey_pen)
ticks += 1
scene.addLine(i, 0, i, signal[i] * factor, blue_pen)
widget.progressBar.setValue(i)
widget.progressBar.setVisible(False)
widget.graphicsView.setScene(scene)
Reading the Audio File
def open_audio_file():
dialog = QtWidgets.QFileDialog()
dialog.setNameFilter("*.wav")
dialog.exec()
if(len(dialog.selectedFiles()) > 0):
widget.path.setText(dialog.selectedFiles()[0])
wav = wave.open(dialog.selectedFiles()[0], 'rb')
nchannels, sampwidth, framerate, nframes, comptype, compname = wav.getparams()
str_data = wav.readframes(nframes)
# Convert bytes to decimals
wav_data = np.fromstring(str_data, dtype=np.short)
if nchannels == 2:
# Stereo sounds are two mono tracks interleaved so we need to:
# Separate the signals into 2 channels for left & right as tuples (indefinite rows and 2 columns)
wav_data.shape = -1, 2
# Transpose the 2 arrays into a single array of left & right channels: 0[l][r], 1[l][r], 2[l][r]
wav_data = wav_data.T
# Put the highest frequency in wav_data[0], equivalent to merging stereo to mono
for i in range(len(wav_data[0])):
if wav_data[0][i] < wav_data[1][i]:
wav_data[0][i] = wav_data[1][i]
# Select only 1 channel, now with the highest frequencies
wav_data = wav_data[0]
plot_signal(framerate, wav_data)
else:
print("Valid path for sound file not found!")
widget.button.clicked.connect(open_audio_file)
window.Show()
Everything Put Together
<syntaxhighlight="python">
</syntaxhighlight>