# Adaptive loudness averaging.

# Adaptive loudness averaging.
# Hens Zimmerman, 02-05-2023.
# Python 3.

import matplotlib.pyplot as plt
import numpy
import os
import pyloudnorm
import regex
import scipy
import soundfile
import sys
import warnings

# These next parameters for the stereo limiter are fixed for now.

delay = 40 # samples
signal_length = 1 # second
release_coeff = 0.9995 # release time factor
attack_coeff = 0.9 # attack time factor
block_length = 1024 # samples

# End of fixed parameters.

class StereoLimiter:
def __init__(self, attack_coeff, release_coeff, delay, threshold):
self.delay_index = 0
self.envelope_left = 0
self.envelope_right = 0
self.gain = 1
self.gain_left = 1
self.gain_right = 1
self.delay = delay
self.delay_line_left = numpy.zeros(delay)
self.delay_line_right = numpy.zeros(delay)
self.release_coeff = release_coeff
self.attack_coeff = attack_coeff
self.threshold = threshold

def limit(self, signal):
for idx, sample in enumerate(signal):
self.delay_line_left[self.delay_index] = sample[0]
self.delay_line_right[self.delay_index] = sample[1]

self.delay_index = (self.delay_index + 1) % self.delay

# Calculate an envelope of the signal.

self.envelope_left = max(abs(sample[0]), self.envelope_left * self.release_coeff)
self.envelope_right = max(abs(sample[1]), self.envelope_right * self.release_coeff)

if self.envelope_left > self.threshold:
target_gain_left = self.threshold / self.envelope_left
else:
target_gain_left = 1.0

if self.envelope_right > self.threshold:
target_gain_right = self.threshold / self.envelope_right
else:
target_gain_right = 1.0

# Have self.gain go towards a desired limiter gain.

self.gain_left = (self.gain_left * self.attack_coeff + target_gain_left * (1 - self.attack_coeff))
self.gain_right = (self.gain_right * self.attack_coeff + target_gain_right * (1 - self.attack_coeff))

# Gang stereo channels.

self.gain = min(self.gain_left, self.gain_right)

# Limit the delayed signal.

signal[idx][0] = self.delay_line_left[self.delay_index] * self.gain
signal[idx][1] = self.delay_line_right[self.delay_index] * self.gain
return signal

# Suppress pyloudnorm warning about clipping.
# Since we compute in float64, we can fix this ourselves.

warnings.simplefilter("ignore")

# What command line args did we get?

arg_count = len(sys.argv)

if arg_count < 2:
print("python dyn_adapt.py file div:xx loudness:-xx xfade:xx lower:xx max-up:x max-down:x oversample:x limit:-x")
exit()

# Name of input file.

filename = sys.argv[1]

# Does this file exist at all?

if not os.path.isfile(filename):
print(filename + " doesn't appear to exist\n")
exit()

# Default division of file into blocks.

division = 10
seconds = False

# Default crossfade ratio into previous block.

xfade = 0.5

# Default target loudness.

final_loudness = -16.0

# Default no block loudness adaptation if loudness for block below a certain value.

lower = 12.0

# Default max upwards gain.

max_upwards = 6.0

# Default max downwards gain.

max_downwards = 6.0

# Oversampling factor.

oversampling = 4

# dBFS for limiter.

limit = -1

# Scan through optional arguments that override defaults
# div:10 div:10s loudness:-16 xfade:90 lower:12 max-up:6 max-down:6 oversample:4 limit:-2

if arg_count > 2:
for idx in range(2, arg_count):
arg = sys.argv[idx]

match = regex.search(r"div:(\d+)", arg, regex.IGNORECASE)
if match:
division = int(match.group(1))

match = regex.search(r"div:(\d+)s", arg, regex.IGNORECASE)
if match:
seconds = True

match = regex.search(r"loudness:-(\d+)", arg, regex.IGNORECASE)
if match:
final_loudness = -int(match.group(1))

match = regex.search(r"xfade:(\d+)", arg, regex.IGNORECASE)
if match:
xfade = int(match.group(1)) / 100

match = regex.search(r"lower:(\d+)", arg, regex.IGNORECASE)
if match:
lower = int(match.group(1))

match = regex.search(r"max-up:(\d+)", arg, regex.IGNORECASE)
if match:
max_upwards = int(match.group(1))

match = regex.search(r"max-down:(\d+)", arg, regex.IGNORECASE)
if match:
max_downwards = int(match.group(1))

match = regex.search(r"oversample:(\d+)", arg, regex.IGNORECASE)
if match:
oversampling = int(match.group(1))

match = regex.search(r"limit:-(\d+)", arg, regex.IGNORECASE)
if match:
limit = -int(match.group(1))

lower_threshold = final_loudness - lower

# Read entire file into ndarray.

audio, samplerate = soundfile.read(filename, frames=-1, dtype='float64', always_2d=True)

# Basic stats about file we got from soundfile.

samples = audio.shape[0]

# Is it a mono file or a multichannel file?

if len(audio.shape) > 1:
channels = audio.shape[1]
if channels > 2:
print("Only stereo audio is currently supported")
exit()
else:
print("Mono files are not supported")
exit()

# Division of file into blocks of size blocksize.
# If user supplied argument in seconds, divide into blocks of that many seconds.

if seconds:
blocksize = division * samplerate
division = int(samples / blocksize)
else:
blocksize = int(samples / division)

print(str(blocksize))

# This leads to an integer size for the crossfade.

fadesize = int(blocksize * xfade)

# create BS.1770 meter

meter = pyloudnorm.Meter(samplerate)

# Buffers to copy data back into.

new_audio = numpy.empty((0, channels))
sub_audio = numpy.empty((0, channels))
prev_audio = numpy.empty((0, channels))

for idx in range(0, division):
# Create this block. Last block may have padding samples.
print("Processing block {0} of {1}".format(idx + 1, division))

start_idx = (idx * blocksize) - fadesize
stop_idx = start_idx + blocksize + fadesize

# First block does not require a crossfade section at the start.

if start_idx < 0:
start_idx = 0

if idx == division - 1:
sub_audio = audio[start_idx:]
else:
sub_audio = audio[start_idx:stop_idx]

# Loudness adapt this block.

loudness = meter.integrated_loudness(sub_audio)

# Do not change "silent" portions of the mix.

if loudness > lower_threshold:
if loudness > final_loudness:
delta = max(loudness - max_downwards, final_loudness)
else:
delta = min(loudness + max_upwards, final_loudness)
sub_audio = pyloudnorm.normalize.loudness(sub_audio, loudness, delta)

# This might issue a warning when we are correctly out of bounds [-1.0 .. 1.0]
# Warning is suppressed so we check and correct for the digital clipping case here.

# Crossfade into previous block.

if idx > 0:
for jdx in range(0, fadesize):
mult = jdx * (1.0 / fadesize)
inv_mult = 1.0 - mult

for ch in range(0, channels):
prev_audio[jdx + blocksize - fadesize][ch] = inv_mult * prev_audio[jdx + blocksize - fadesize][ch] + mult * sub_audio[jdx][ch]

# Remove crossfade area at the beginning of this block, but not for first block.

if idx > 0:
sub_audio = sub_audio[fadesize:]

# Append previous block to new_audio.

new_audio = numpy.append(new_audio, prev_audio, axis = 0)

# This block becomes previous block for next iteration.

prev_audio = sub_audio

# Out of the loop we still need to concat the last block.

new_audio = numpy.append(new_audio, prev_audio, axis = 0)

# Gain scale final buffer to requested loudness norm.

loudness = meter.integrated_loudness(new_audio)

new_audio = pyloudnorm.normalize.loudness(new_audio, loudness, final_loudness)

peak_dB = 20.0 * numpy.log10(max(abs(numpy.min(new_audio)), numpy.max(new_audio)))

print("Sample peak at " + str(peak_dB) + " dBFS")

if oversampling > 1:
print("Oversampling... hold on to your seats...")

oversampled_new_audio = scipy.signal.resample(new_audio, samples * oversampling)

peak_dB = 20.0 * numpy.log10(max(abs(numpy.min(oversampled_new_audio)), numpy.max(oversampled_new_audio)))

print("Oversampled peak at " + str(peak_dB) + " dBFS")

# Remove extension from filename.

ext_length = 4

new_name = filename[:-ext_length] + '_new.wav'

if peak_dB > limit:
threshold = 10**(limit / 20)
limiter = StereoLimiter(attack_coeff, release_coeff, delay, threshold)
limited_new_audio = limiter.limit(new_audio)
soundfile.write(new_name, limited_new_audio, samplerate, 'PCM_24')
else:
soundfile.write(new_name, new_audio, samplerate, 'PCM_24')