Skip to content

Commit c02d038

Browse files
awsaf49yongtang
andauthored
Add ref and amin argument in dbscale (#1779)
* Add `ref` and `amin` argument `amin` will avoid -inf for zero inputs and `ref` will provide more flexibility * Added test for `ref` & `amin` in `dbscale` * Fix for `black` lint --------- Co-authored-by: Yong Tang <yong.tang.github@outlook.com>
1 parent 51941dd commit c02d038

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

tensorflow_io/python/ops/audio_ops.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,20 +131,27 @@ def melscale(input, rate, mels, fmin, fmax, name=None):
131131
return tf.tensordot(input, matrix, 1)
132132

133133

134-
def dbscale(input, top_db, name=None):
134+
def dbscale(input, top_db, ref=1.0, amin=1e-10, name=None):
135135
"""
136136
Turn spectrogram into db scale
137137
138138
Args:
139139
input: A spectrogram Tensor.
140140
top_db: Minimum negative cut-off `max(10 * log10(S)) - top_db`
141+
ref: The power is scaled relative to it `10 * log10(S / ref)`
142+
amin: Minimum value for power and `ref`
141143
name: A name for the operation (optional).
142144
143145
Returns:
144146
A tensor of mel spectrogram with shape [frames, mels].
145147
"""
146148
power = tf.math.square(input)
147-
log_spec = 10.0 * (tf.math.log(power) / tf.math.log(10.0))
149+
log_spec = 10.0 * (tf.math.log(tf.math.maximum(power, amin)) / tf.math.log(10.0))
150+
if callable(ref):
151+
ref_value = ref(power)
152+
else:
153+
ref_value = tf.math.abs(ref)
154+
log_spec -= 10.0 * tf.math.log(tf.math.maximum(ref_value, amin)) / tf.math.log(10.0)
148155
log_spec = tf.math.maximum(log_spec, tf.math.reduce_max(log_spec) - top_db)
149156
return log_spec
150157

tests/test_audio_ops.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,23 @@ def test_spectrogram():
876876
assert dbscale_mel_spectrogram.shape == [29, mels]
877877
assert dbscale_mel_spectrogram.dtype == tf.float32
878878

879+
# Inf check for zero input
880+
dbscale_mel_spectrogram = tfio.audio.dbscale(
881+
tf.zeros_like(mel_spectrogram), top_db=80, amin=1e-10
882+
)
883+
884+
# Check if any inf in output
885+
assert not tf.math.reduce_any(tf.math.is_inf(dbscale_mel_spectrogram))
886+
887+
# Custom ref check
888+
dbscale_mel_spectrogram = tfio.audio.dbscale(
889+
mel_spectrogram, top_db=80, ref=tf.math.reduce_max
890+
)
891+
892+
# Check content after ref is different
893+
assert dbscale_mel_spectrogram.shape == [29, mels]
894+
assert dbscale_mel_spectrogram.dtype == tf.float32
895+
879896
spec = dbscale_mel_spectrogram
880897

881898
# Freq masking

0 commit comments

Comments
 (0)