Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/ECAPA-TDNN/reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""data process"""

import os
import pickle

Expand Down
1 change: 1 addition & 0 deletions examples/ECAPA-TDNN/sampler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""sampler definition"""

from __future__ import division

import math
Expand Down
3 changes: 3 additions & 0 deletions examples/ECAPA-TDNN/speaker_verification_cosine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# ECAPA_TDNN in mindspore.
# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/speaker_verification_cosine.py
"""
Recipe for training a speaker verification system based on cosine distance.
"""

import datetime
import os
import pickle
Expand Down
3 changes: 3 additions & 0 deletions examples/ECAPA-TDNN/train_speaker_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Training speaker embeddings in mindspore.
# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/train_speaker_embeddings.py
"""
Recipe for training speaker embeddings using the VoxCeleb Dataset.
"""

import math
import os
import pickle
Expand Down
2 changes: 2 additions & 0 deletions examples/ECAPA-TDNN/voxceleb_prepare.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Data preparation in mindspore.
# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/voxceleb_prepare.py
"""
Data preparation, from mindaudio VoxCeleb recipe.
"""
Expand Down
2 changes: 2 additions & 0 deletions examples/conformer/asr_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# ASR model in mindspore.
# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/asr_model.py
"""Definition of ASR model."""

import mindspore
Expand Down
3 changes: 2 additions & 1 deletion examples/conformer/predict.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""ASR inference process.
"""
ASR inference process.

python predict.py --config_path <CONFIG_FILE>
"""
Expand Down
3 changes: 2 additions & 1 deletion examples/conformer/train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""ASR training process.
"""
ASR training process.

python train.py --config_path <CONFIG_FILE>
"""
Expand Down
29 changes: 2 additions & 27 deletions examples/conv_tasnet/data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# AudioDataLoader in mindspore.
# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/data.py
"""
Logic:
1. AudioDataLoader generate a minibatch from AudioDataset, the size of this
Expand All @@ -16,14 +18,11 @@
Each targets's shape is B x C x T
"""

import argparse
import json
import math
import os

import mindspore.dataset as ds
import numpy as np
from mindspore import context

import mindaudio.data.io as io

Expand Down Expand Up @@ -176,27 +175,3 @@ def sort_and_pad(self, batch):

sources_pad = sources_pad.transpose((0, 2, 1))
return mixtures_pad, ilens, sources_pad


if __name__ == "__main__":
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=4)
args = parser.parse_args()
print(args)
tr_dataset = DatasetGenerator(
args.train_dir,
args.batch_size,
sample_rate=args.sample_rate,
segment=args.segment,
)
dataset = ds.GeneratorDataset(
tr_dataset, ["mixture", "lens", "sources"], shuffle=False
)
dataset = dataset.batch(batch_size=5)
iter_per_epoch = dataset.get_dataset_size()
print(iter_per_epoch)
h = 0
for data in dataset.create_dict_iterator():
h += 1
print(data["mixture"])
print(data["lens"])
print(data["sources"])
3 changes: 3 additions & 0 deletions examples/conv_tasnet/eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Evaluation of Conv-TasNet in mindspore.
# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py

import mindspore
import mindspore.dataset as ds
import mindspore.ops as ops
Expand Down
2 changes: 2 additions & 0 deletions examples/conv_tasnet/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Preprocess of Conv-TasNet in mindspore.
# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/preprocess.py
""" Convert the relevant information in the audio wav file to a json file """

import argparse
Expand Down
3 changes: 3 additions & 0 deletions examples/conv_tasnet/train.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Train of Conv-TasNet in mindspore.
# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/train.py

import os

import mindspore.dataset as ds
Expand Down
2 changes: 2 additions & 0 deletions examples/deepspeech2/eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Evaluation of deepspeech2 in mindspore.
# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/deepspeech_pytorch/validation.py
"""
Eval DeepSpeech2
"""
Expand Down
2 changes: 2 additions & 0 deletions examples/deepspeech2/train.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Train of deepspeech2 in mindspore.
# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/deepspeech_pytorch/training.py
"""train_criteo."""

import os
Expand Down
3 changes: 3 additions & 0 deletions examples/fastspeech2/dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# LJSpeech dataloader in mindspore.
# Adapted from https://github.com/ming024/FastSpeech2/blob/master/dataset.py

import os
import sys
from multiprocessing import cpu_count
Expand Down
3 changes: 3 additions & 0 deletions examples/fastspeech2/generate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Synthesize in mindspore.
# Adapted from https://github.com/ming024/FastSpeech2/blob/master/synthesize.py

import argparse
import os
import re
Expand Down
3 changes: 3 additions & 0 deletions examples/fastspeech2/ljspeech.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# LJSpeech dataloader in mindspore.
# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py

import csv
import os

Expand Down
1 change: 1 addition & 0 deletions examples/fastspeech2/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Given the path to ljspeech/wavs,
# this script converts wav files to .npy features used for training.
# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/preprocessor.py

import argparse
import os
Expand Down
2 changes: 1 addition & 1 deletion examples/fastspeech2/text/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" from https://github.com/keithito/tacotron """
# Copited from from https://github.com/keithito/tacotron
import re

from text import cleaners
Expand Down
2 changes: 1 addition & 1 deletion examples/fastspeech2/text/cleaners.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" from https://github.com/keithito/tacotron """
# Copited from https://github.com/keithito/tacotron

"""
Cleaners are transformations that run over the input text at both training and eval time.
Expand Down
2 changes: 1 addition & 1 deletion examples/fastspeech2/text/cmudict.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" from https://github.com/keithito/tacotron """
# Copited from https://github.com/keithito/tacotron

import re

Expand Down
2 changes: 1 addition & 1 deletion examples/fastspeech2/text/numbers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" from https://github.com/keithito/tacotron """
# Copited from https://github.com/keithito/tacotron

import re

Expand Down
2 changes: 2 additions & 0 deletions examples/fastspeech2/text/pinyin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/pinyin.py

initials = [
"b",
"c",
Expand Down
2 changes: 2 additions & 0 deletions examples/fastspeech2/text/symbols.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/symbols.py

from text import pinyin

valid_symbols = [
Expand Down
3 changes: 3 additions & 0 deletions examples/fastspeech2/train.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Training in mindspore.
# Adapted from https://github.com/ming024/FastSpeech2/blob/master/train.py

import argparse
import ast
import os
Expand Down
4 changes: 3 additions & 1 deletion examples/tasnet/data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
""" data """
# AudioDataLoader in mindspore.
# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/data.py

import json
import os

Expand Down
12 changes: 4 additions & 8 deletions examples/tasnet/eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Evaluation of TasNet in mindspore.
# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/evaluate.py

import argparse
import json
import os
Expand All @@ -6,14 +9,7 @@
import mindspore.dataset as ds
import mindspore.ops as ops
from data import DatasetGenerator
from mindspore import (
Parameter,
Tensor,
context,
load_checkpoint,
load_param_into_net,
set_seed,
)
from mindspore import context, load_checkpoint, load_param_into_net, set_seed

import mindaudio.data.io as io
from mindaudio.loss.separation_loss import Separation_Loss
Expand Down
2 changes: 2 additions & 0 deletions examples/tasnet/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Preprocess of TasNet in mindspore.
# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/preprocess.py
""" Convert the relevant information in the audio wav file to a json file """

import argparse
Expand Down
4 changes: 3 additions & 1 deletion examples/tasnet/train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
""" Train """
# Training of TasNet in mindspore.
# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/train.py

import argparse
import json
import os
Expand Down
1 change: 1 addition & 0 deletions examples/tasnet/train_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Training Wrapper """

import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import context
Expand Down
3 changes: 3 additions & 0 deletions examples/wavegrad/dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# AudioDataLoader in mindspore.
# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/dataset.py

from multiprocessing import cpu_count

import numpy as np
Expand Down
3 changes: 3 additions & 0 deletions examples/wavegrad/ljspeech.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# LJSpeech dataloader in mindspore.
# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py

import csv
import os

Expand Down
3 changes: 3 additions & 0 deletions examples/wavegrad/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Preprocess in mindspore.
# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/preprocess.py

import sys
from multiprocessing import Pool, cpu_count

Expand Down
3 changes: 3 additions & 0 deletions mindaudio/data/aishell.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# AISHELL dataloader in mindspore.
# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/AISHELL-1/aishell_prepare.py

import argparse
import csv
import glob
Expand Down
3 changes: 3 additions & 0 deletions mindaudio/data/augment.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Time-Domain Sequential Data Augmentation Classes
# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/augment/time_domain.py

import random

import mindspore as ms
Expand Down
3 changes: 3 additions & 0 deletions mindaudio/data/features.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Feature extraction
# Adapted from https://github.com/librosa/librosa/tree/main/librosa/feature/spectral.py

import mindspore as ms
import mindspore.dataset.audio as msaudio
import numpy as np
Expand Down
5 changes: 5 additions & 0 deletions mindaudio/data/filters.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Filter bank construction
# Adapted from https://github.com/librosa/librosa/tree/main/librosa/filters.py
# and
# https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/processing/signal_processing.py

from typing import Optional, Union

try:
Expand Down
3 changes: 3 additions & 0 deletions mindaudio/data/io.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Module to read / write wav files using NumPy arrays
# Adapted from https://github.com/scipy/scipy/tree/main/scipy/io/wavfile.py

import collections
import io
import struct
Expand Down
3 changes: 3 additions & 0 deletions mindaudio/data/librispeech.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# LibriSpeech dataloader in mindspore.
# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/data/librispeech.py

import argparse
import json
import os
Expand Down
5 changes: 5 additions & 0 deletions mindaudio/data/processing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Signal processing utilities
# Adapted from https://github.com/librosa/librosa/tree/main/librosa/core/audio.py
# and
# https://github.com/facebookresearch/AugLy/blob/main/augly/audio/functional.py

import math

import mindspore as ms
Expand Down
2 changes: 2 additions & 0 deletions mindaudio/data/spectrum.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Adapted from https://github.com/librosa/librosa/tree/main/librosa/core/spectrum.py

import mindspore as ms
import mindspore.dataset.audio as msaudio
import numpy as np
Expand Down
2 changes: 2 additions & 0 deletions mindaudio/data/voxceleb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Voxceleb dataloader in mindspore.
# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/voxceleb_prepare.py
"""
Data preparation, from mindaudio VoxCeleb recipe.
"""
Expand Down
3 changes: 3 additions & 0 deletions mindaudio/loss/AdditiveAngularMargin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# AdditiveAngularMargin in mindspore.
# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/losses.py

import math

import mindspore as ms
Expand Down
2 changes: 2 additions & 0 deletions mindaudio/loss/ctc_loss.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# CTC in mindspore.
# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/ctc.py
"""CTC layer."""

import mindspore
Expand Down
2 changes: 2 additions & 0 deletions mindaudio/loss/label_smoothing_loss.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Label_smoothing_loss in mindspore.
# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/label_smoothing_loss.py
"""Label smoothing module."""

import mindspore
Expand Down
3 changes: 3 additions & 0 deletions mindaudio/loss/separation_loss.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Separation_loss in mindspore.
# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/pit_criterion.py
""" Loss """

from itertools import permutations

import mindspore
Expand Down
3 changes: 3 additions & 0 deletions mindaudio/metric/snr.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# SNR in mindspore.
# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py

import numpy as np
from mir_eval.separation import bss_eval_sources

Expand Down
4 changes: 3 additions & 1 deletion mindaudio/models/conformer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Definition of ASR model."""
# Conformer in mindspore.
# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer
"""Definition of conformer model."""

from typing import Optional, Tuple

Expand Down
3 changes: 3 additions & 0 deletions mindaudio/models/conv_tasnet.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Conv-TasNet in mindspore.
# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/conv_tasnet.py

import argparse
import math

Expand Down
Loading