diff --git a/examples/ECAPA-TDNN/reader.py b/examples/ECAPA-TDNN/reader.py index 4b66f89..1536b33 100644 --- a/examples/ECAPA-TDNN/reader.py +++ b/examples/ECAPA-TDNN/reader.py @@ -1,4 +1,5 @@ """data process""" + import os import pickle diff --git a/examples/ECAPA-TDNN/sampler.py b/examples/ECAPA-TDNN/sampler.py index f8f5b33..993b0d5 100644 --- a/examples/ECAPA-TDNN/sampler.py +++ b/examples/ECAPA-TDNN/sampler.py @@ -1,4 +1,5 @@ """sampler definition""" + from __future__ import division import math diff --git a/examples/ECAPA-TDNN/speaker_verification_cosine.py b/examples/ECAPA-TDNN/speaker_verification_cosine.py index 431bf7e..ec74227 100644 --- a/examples/ECAPA-TDNN/speaker_verification_cosine.py +++ b/examples/ECAPA-TDNN/speaker_verification_cosine.py @@ -1,6 +1,9 @@ +# ECAPA_TDNN in mindspore. +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/speaker_verification_cosine.py """ Recipe for training a speaker verification system based on cosine distance. """ + import datetime import os import pickle diff --git a/examples/ECAPA-TDNN/train_speaker_embeddings.py b/examples/ECAPA-TDNN/train_speaker_embeddings.py index f685a42..eaa4550 100644 --- a/examples/ECAPA-TDNN/train_speaker_embeddings.py +++ b/examples/ECAPA-TDNN/train_speaker_embeddings.py @@ -1,6 +1,9 @@ +# Training speaker embeddings in mindspore. +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/train_speaker_embeddings.py """ Recipe for training speaker embeddings using the VoxCeleb Dataset. """ + import math import os import pickle diff --git a/examples/ECAPA-TDNN/voxceleb_prepare.py b/examples/ECAPA-TDNN/voxceleb_prepare.py index 7d27571..4a46a70 100644 --- a/examples/ECAPA-TDNN/voxceleb_prepare.py +++ b/examples/ECAPA-TDNN/voxceleb_prepare.py @@ -1,3 +1,5 @@ +# Data preparation in mindspore. +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/voxceleb_prepare.py """ Data preparation, from mindaudio VoxCeleb recipe. """ diff --git a/examples/conformer/asr_model.py b/examples/conformer/asr_model.py index a9d7a2d..5436e23 100644 --- a/examples/conformer/asr_model.py +++ b/examples/conformer/asr_model.py @@ -1,3 +1,5 @@ +# ASR model in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/asr_model.py """Definition of ASR model.""" import mindspore diff --git a/examples/conformer/predict.py b/examples/conformer/predict.py index e89f0f6..b6616a6 100644 --- a/examples/conformer/predict.py +++ b/examples/conformer/predict.py @@ -1,4 +1,5 @@ -"""ASR inference process. +""" +ASR inference process. python predict.py --config_path """ diff --git a/examples/conformer/train.py b/examples/conformer/train.py index 1b20a32..19b4dce 100644 --- a/examples/conformer/train.py +++ b/examples/conformer/train.py @@ -1,4 +1,5 @@ -"""ASR training process. +""" +ASR training process. python train.py --config_path """ diff --git a/examples/conv_tasnet/data.py b/examples/conv_tasnet/data.py index 3c921a2..f076817 100644 --- a/examples/conv_tasnet/data.py +++ b/examples/conv_tasnet/data.py @@ -1,3 +1,5 @@ +# AudioDataLoader in mindspore. +# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/data.py """ Logic: 1. AudioDataLoader generate a minibatch from AudioDataset, the size of this @@ -16,14 +18,11 @@ Each targets's shape is B x C x T """ -import argparse import json import math import os -import mindspore.dataset as ds import numpy as np -from mindspore import context import mindaudio.data.io as io @@ -176,27 +175,3 @@ def sort_and_pad(self, batch): sources_pad = sources_pad.transpose((0, 2, 1)) return mixtures_pad, ilens, sources_pad - - -if __name__ == "__main__": - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=4) - args = parser.parse_args() - print(args) - tr_dataset = DatasetGenerator( - args.train_dir, - args.batch_size, - sample_rate=args.sample_rate, - segment=args.segment, - ) - dataset = ds.GeneratorDataset( - tr_dataset, ["mixture", "lens", "sources"], shuffle=False - ) - dataset = dataset.batch(batch_size=5) - iter_per_epoch = dataset.get_dataset_size() - print(iter_per_epoch) - h = 0 - for data in dataset.create_dict_iterator(): - h += 1 - print(data["mixture"]) - print(data["lens"]) - print(data["sources"]) diff --git a/examples/conv_tasnet/eval.py b/examples/conv_tasnet/eval.py index d21391f..ba913c2 100644 --- a/examples/conv_tasnet/eval.py +++ b/examples/conv_tasnet/eval.py @@ -1,3 +1,6 @@ +# Evaluation of Conv-TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py + import mindspore import mindspore.dataset as ds import mindspore.ops as ops diff --git a/examples/conv_tasnet/preprocess.py b/examples/conv_tasnet/preprocess.py index b6ee267..2401033 100644 --- a/examples/conv_tasnet/preprocess.py +++ b/examples/conv_tasnet/preprocess.py @@ -1,3 +1,5 @@ +# Preprocess of Conv-TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/preprocess.py """ Convert the relevant information in the audio wav file to a json file """ import argparse diff --git a/examples/conv_tasnet/train.py b/examples/conv_tasnet/train.py index 9c021fc..b9377dc 100644 --- a/examples/conv_tasnet/train.py +++ b/examples/conv_tasnet/train.py @@ -1,3 +1,6 @@ +# Train of Conv-TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/train.py + import os import mindspore.dataset as ds diff --git a/examples/deepspeech2/eval.py b/examples/deepspeech2/eval.py index 70cc725..0412118 100644 --- a/examples/deepspeech2/eval.py +++ b/examples/deepspeech2/eval.py @@ -1,3 +1,5 @@ +# Evaluation of deepspeech2 in mindspore. +# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/deepspeech_pytorch/validation.py """ Eval DeepSpeech2 """ diff --git a/examples/deepspeech2/train.py b/examples/deepspeech2/train.py index 4c01086..a4f364b 100644 --- a/examples/deepspeech2/train.py +++ b/examples/deepspeech2/train.py @@ -1,3 +1,5 @@ +# Train of deepspeech2 in mindspore. +# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/deepspeech_pytorch/training.py """train_criteo.""" import os diff --git a/examples/fastspeech2/dataset.py b/examples/fastspeech2/dataset.py index 35a4e35..7e58d0c 100755 --- a/examples/fastspeech2/dataset.py +++ b/examples/fastspeech2/dataset.py @@ -1,3 +1,6 @@ +# LJSpeech dataloader in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/dataset.py + import os import sys from multiprocessing import cpu_count diff --git a/examples/fastspeech2/generate.py b/examples/fastspeech2/generate.py index 775a2a4..c4555c8 100644 --- a/examples/fastspeech2/generate.py +++ b/examples/fastspeech2/generate.py @@ -1,3 +1,6 @@ +# Synthesize in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/synthesize.py + import argparse import os import re diff --git a/examples/fastspeech2/ljspeech.py b/examples/fastspeech2/ljspeech.py index 9f73c88..4d07ec8 100755 --- a/examples/fastspeech2/ljspeech.py +++ b/examples/fastspeech2/ljspeech.py @@ -1,3 +1,6 @@ +# LJSpeech dataloader in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py + import csv import os diff --git a/examples/fastspeech2/preprocess.py b/examples/fastspeech2/preprocess.py index c2a6d93..137a1cb 100755 --- a/examples/fastspeech2/preprocess.py +++ b/examples/fastspeech2/preprocess.py @@ -1,5 +1,6 @@ # Given the path to ljspeech/wavs, # this script converts wav files to .npy features used for training. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/preprocessor.py import argparse import os diff --git a/examples/fastspeech2/text/__init__.py b/examples/fastspeech2/text/__init__.py index 25b3330..ca49065 100644 --- a/examples/fastspeech2/text/__init__.py +++ b/examples/fastspeech2/text/__init__.py @@ -1,4 +1,4 @@ -""" from https://github.com/keithito/tacotron """ +# Copited from from https://github.com/keithito/tacotron import re from text import cleaners diff --git a/examples/fastspeech2/text/cleaners.py b/examples/fastspeech2/text/cleaners.py index 399b853..515651c 100644 --- a/examples/fastspeech2/text/cleaners.py +++ b/examples/fastspeech2/text/cleaners.py @@ -1,4 +1,4 @@ -""" from https://github.com/keithito/tacotron """ +# Copited from https://github.com/keithito/tacotron """ Cleaners are transformations that run over the input text at both training and eval time. diff --git a/examples/fastspeech2/text/cmudict.py b/examples/fastspeech2/text/cmudict.py index 9858827..c566b99 100644 --- a/examples/fastspeech2/text/cmudict.py +++ b/examples/fastspeech2/text/cmudict.py @@ -1,4 +1,4 @@ -""" from https://github.com/keithito/tacotron """ +# Copited from https://github.com/keithito/tacotron import re diff --git a/examples/fastspeech2/text/numbers.py b/examples/fastspeech2/text/numbers.py index d5cf986..ce9641c 100644 --- a/examples/fastspeech2/text/numbers.py +++ b/examples/fastspeech2/text/numbers.py @@ -1,4 +1,4 @@ -""" from https://github.com/keithito/tacotron """ +# Copited from https://github.com/keithito/tacotron import re diff --git a/examples/fastspeech2/text/pinyin.py b/examples/fastspeech2/text/pinyin.py index b590081..71ae5e8 100644 --- a/examples/fastspeech2/text/pinyin.py +++ b/examples/fastspeech2/text/pinyin.py @@ -1,3 +1,5 @@ +# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/pinyin.py + initials = [ "b", "c", diff --git a/examples/fastspeech2/text/symbols.py b/examples/fastspeech2/text/symbols.py index 426d2d6..4fdbe75 100644 --- a/examples/fastspeech2/text/symbols.py +++ b/examples/fastspeech2/text/symbols.py @@ -1,3 +1,5 @@ +# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/symbols.py + from text import pinyin valid_symbols = [ diff --git a/examples/fastspeech2/train.py b/examples/fastspeech2/train.py index fd7b56b..834f276 100755 --- a/examples/fastspeech2/train.py +++ b/examples/fastspeech2/train.py @@ -1,3 +1,6 @@ +# Training in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/train.py + import argparse import ast import os diff --git a/examples/tasnet/data.py b/examples/tasnet/data.py index 87364e8..a5ed0e3 100644 --- a/examples/tasnet/data.py +++ b/examples/tasnet/data.py @@ -1,4 +1,6 @@ -""" data """ +# AudioDataLoader in mindspore. +# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/data.py + import json import os diff --git a/examples/tasnet/eval.py b/examples/tasnet/eval.py index 67afb63..b3130ac 100644 --- a/examples/tasnet/eval.py +++ b/examples/tasnet/eval.py @@ -1,3 +1,6 @@ +# Evaluation of TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/evaluate.py + import argparse import json import os @@ -6,14 +9,7 @@ import mindspore.dataset as ds import mindspore.ops as ops from data import DatasetGenerator -from mindspore import ( - Parameter, - Tensor, - context, - load_checkpoint, - load_param_into_net, - set_seed, -) +from mindspore import context, load_checkpoint, load_param_into_net, set_seed import mindaudio.data.io as io from mindaudio.loss.separation_loss import Separation_Loss diff --git a/examples/tasnet/preprocess.py b/examples/tasnet/preprocess.py index ee29712..2dff787 100755 --- a/examples/tasnet/preprocess.py +++ b/examples/tasnet/preprocess.py @@ -1,3 +1,5 @@ +# Preprocess of TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/preprocess.py """ Convert the relevant information in the audio wav file to a json file """ import argparse diff --git a/examples/tasnet/train.py b/examples/tasnet/train.py index ec6d58d..ecf3522 100644 --- a/examples/tasnet/train.py +++ b/examples/tasnet/train.py @@ -1,4 +1,6 @@ -""" Train """ +# Training of TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/train.py + import argparse import json import os diff --git a/examples/tasnet/train_wrapper.py b/examples/tasnet/train_wrapper.py index 9a20996..64587d8 100644 --- a/examples/tasnet/train_wrapper.py +++ b/examples/tasnet/train_wrapper.py @@ -1,4 +1,5 @@ """ Training Wrapper """ + import mindspore.nn as nn import mindspore.ops as ops from mindspore import context diff --git a/examples/wavegrad/dataset.py b/examples/wavegrad/dataset.py index 5caca6c..ec90334 100644 --- a/examples/wavegrad/dataset.py +++ b/examples/wavegrad/dataset.py @@ -1,3 +1,6 @@ +# AudioDataLoader in mindspore. +# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/dataset.py + from multiprocessing import cpu_count import numpy as np diff --git a/examples/wavegrad/ljspeech.py b/examples/wavegrad/ljspeech.py index 9f73c88..4d07ec8 100755 --- a/examples/wavegrad/ljspeech.py +++ b/examples/wavegrad/ljspeech.py @@ -1,3 +1,6 @@ +# LJSpeech dataloader in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py + import csv import os diff --git a/examples/wavegrad/preprocess.py b/examples/wavegrad/preprocess.py index 17eac81..8c2255d 100755 --- a/examples/wavegrad/preprocess.py +++ b/examples/wavegrad/preprocess.py @@ -1,3 +1,6 @@ +# Preprocess in mindspore. +# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/preprocess.py + import sys from multiprocessing import Pool, cpu_count diff --git a/mindaudio/data/aishell.py b/mindaudio/data/aishell.py index c21117c..370b629 100644 --- a/mindaudio/data/aishell.py +++ b/mindaudio/data/aishell.py @@ -1,3 +1,6 @@ +# AISHELL dataloader in mindspore. +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/AISHELL-1/aishell_prepare.py + import argparse import csv import glob diff --git a/mindaudio/data/augment.py b/mindaudio/data/augment.py index 968af82..58c143d 100644 --- a/mindaudio/data/augment.py +++ b/mindaudio/data/augment.py @@ -1,3 +1,6 @@ +# Time-Domain Sequential Data Augmentation Classes +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/augment/time_domain.py + import random import mindspore as ms diff --git a/mindaudio/data/features.py b/mindaudio/data/features.py index b06dec4..de091f7 100644 --- a/mindaudio/data/features.py +++ b/mindaudio/data/features.py @@ -1,3 +1,6 @@ +# Feature extraction +# Adapted from https://github.com/librosa/librosa/tree/main/librosa/feature/spectral.py + import mindspore as ms import mindspore.dataset.audio as msaudio import numpy as np diff --git a/mindaudio/data/filters.py b/mindaudio/data/filters.py index 77a9386..62db3fb 100644 --- a/mindaudio/data/filters.py +++ b/mindaudio/data/filters.py @@ -1,3 +1,8 @@ +# Filter bank construction +# Adapted from https://github.com/librosa/librosa/tree/main/librosa/filters.py +# and +# https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/processing/signal_processing.py + from typing import Optional, Union try: diff --git a/mindaudio/data/io.py b/mindaudio/data/io.py index 4b6d6a5..d675a54 100644 --- a/mindaudio/data/io.py +++ b/mindaudio/data/io.py @@ -1,3 +1,6 @@ +# Module to read / write wav files using NumPy arrays +# Adapted from https://github.com/scipy/scipy/tree/main/scipy/io/wavfile.py + import collections import io import struct diff --git a/mindaudio/data/librispeech.py b/mindaudio/data/librispeech.py index 7f433ef..417f93e 100644 --- a/mindaudio/data/librispeech.py +++ b/mindaudio/data/librispeech.py @@ -1,3 +1,6 @@ +# LibriSpeech dataloader in mindspore. +# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/data/librispeech.py + import argparse import json import os diff --git a/mindaudio/data/processing.py b/mindaudio/data/processing.py index d4a60f3..7f94687 100644 --- a/mindaudio/data/processing.py +++ b/mindaudio/data/processing.py @@ -1,3 +1,8 @@ +# Signal processing utilities +# Adapted from https://github.com/librosa/librosa/tree/main/librosa/core/audio.py +# and +# https://github.com/facebookresearch/AugLy/blob/main/augly/audio/functional.py + import math import mindspore as ms diff --git a/mindaudio/data/spectrum.py b/mindaudio/data/spectrum.py index ec66ef2..82091ed 100644 --- a/mindaudio/data/spectrum.py +++ b/mindaudio/data/spectrum.py @@ -1,3 +1,5 @@ +# Adapted from https://github.com/librosa/librosa/tree/main/librosa/core/spectrum.py + import mindspore as ms import mindspore.dataset.audio as msaudio import numpy as np diff --git a/mindaudio/data/voxceleb.py b/mindaudio/data/voxceleb.py index cad01a7..280faa7 100644 --- a/mindaudio/data/voxceleb.py +++ b/mindaudio/data/voxceleb.py @@ -1,3 +1,5 @@ +# Voxceleb dataloader in mindspore. +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/voxceleb_prepare.py """ Data preparation, from mindaudio VoxCeleb recipe. """ diff --git a/mindaudio/loss/AdditiveAngularMargin.py b/mindaudio/loss/AdditiveAngularMargin.py index ea2e405..533c465 100644 --- a/mindaudio/loss/AdditiveAngularMargin.py +++ b/mindaudio/loss/AdditiveAngularMargin.py @@ -1,3 +1,6 @@ +# AdditiveAngularMargin in mindspore. +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/losses.py + import math import mindspore as ms diff --git a/mindaudio/loss/ctc_loss.py b/mindaudio/loss/ctc_loss.py index f513951..cb94c9c 100644 --- a/mindaudio/loss/ctc_loss.py +++ b/mindaudio/loss/ctc_loss.py @@ -1,3 +1,5 @@ +# CTC in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/ctc.py """CTC layer.""" import mindspore diff --git a/mindaudio/loss/label_smoothing_loss.py b/mindaudio/loss/label_smoothing_loss.py index 8e1542a..edd17c7 100644 --- a/mindaudio/loss/label_smoothing_loss.py +++ b/mindaudio/loss/label_smoothing_loss.py @@ -1,3 +1,5 @@ +# Label_smoothing_loss in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/label_smoothing_loss.py """Label smoothing module.""" import mindspore diff --git a/mindaudio/loss/separation_loss.py b/mindaudio/loss/separation_loss.py index 34bc0af..484b0fd 100644 --- a/mindaudio/loss/separation_loss.py +++ b/mindaudio/loss/separation_loss.py @@ -1,4 +1,7 @@ +# Separation_loss in mindspore. +# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/pit_criterion.py """ Loss """ + from itertools import permutations import mindspore diff --git a/mindaudio/metric/snr.py b/mindaudio/metric/snr.py index 55a19c8..48b6818 100644 --- a/mindaudio/metric/snr.py +++ b/mindaudio/metric/snr.py @@ -1,3 +1,6 @@ +# SNR in mindspore. +# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py + import numpy as np from mir_eval.separation import bss_eval_sources diff --git a/mindaudio/models/conformer.py b/mindaudio/models/conformer.py index 5b41801..3714c07 100644 --- a/mindaudio/models/conformer.py +++ b/mindaudio/models/conformer.py @@ -1,4 +1,6 @@ -"""Definition of ASR model.""" +# Conformer in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer +"""Definition of conformer model.""" from typing import Optional, Tuple diff --git a/mindaudio/models/conv_tasnet.py b/mindaudio/models/conv_tasnet.py index 22fc4cc..af2a9d5 100644 --- a/mindaudio/models/conv_tasnet.py +++ b/mindaudio/models/conv_tasnet.py @@ -1,3 +1,6 @@ +# Conv-TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/conv_tasnet.py + import argparse import math diff --git a/mindaudio/models/decoders/decoder_factory.py b/mindaudio/models/decoders/decoder_factory.py index 4d112f0..117d82a 100644 --- a/mindaudio/models/decoders/decoder_factory.py +++ b/mindaudio/models/decoders/decoder_factory.py @@ -1,3 +1,5 @@ +# Search in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet """Prediction net for ASR inference.""" import mindspore diff --git a/mindaudio/models/decoders/greedydecoder.py b/mindaudio/models/decoders/greedydecoder.py index a11e478..8062803 100644 --- a/mindaudio/models/decoders/greedydecoder.py +++ b/mindaudio/models/decoders/greedydecoder.py @@ -1,3 +1,6 @@ +# Greedydecoder of deepspeech2 in mindspore. +# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/src/deepspeech_pytorch/decoder.py + import Levenshtein as Lev import numpy as np from six.moves import xrange diff --git a/mindaudio/models/deepspeech2.py b/mindaudio/models/deepspeech2.py index 84e377e..89d59b2 100644 --- a/mindaudio/models/deepspeech2.py +++ b/mindaudio/models/deepspeech2.py @@ -1,6 +1,5 @@ -""" -DeepSpeech2 model -""" +# DeepSpeech2 model in mindspore. +# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/src/deepspeech_pytorch/model.py import math diff --git a/mindaudio/models/ecapatdnn.py b/mindaudio/models/ecapatdnn.py index c69113d..fc5b37b 100644 --- a/mindaudio/models/ecapatdnn.py +++ b/mindaudio/models/ecapatdnn.py @@ -1,3 +1,6 @@ +# ECAPA_TDNN in mindspore. +# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/lobes/models/ECAPA_TDNN.py + import mindspore as ms import mindspore.nn as nn import mindspore.ops as ops diff --git a/mindaudio/models/fastspeech2/fastspeech2_v190.py b/mindaudio/models/fastspeech2/fastspeech2_v190.py index 30bcc3b..9ace938 100755 --- a/mindaudio/models/fastspeech2/fastspeech2_v190.py +++ b/mindaudio/models/fastspeech2/fastspeech2_v190.py @@ -1,3 +1,6 @@ +# FastSpeech2 in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/model/fastspeech2.py + import mindspore as ms import mindspore.nn as nn import numpy as np diff --git a/mindaudio/models/fastspeech2/loss.py b/mindaudio/models/fastspeech2/loss.py index 5a11470..0b49c0f 100644 --- a/mindaudio/models/fastspeech2/loss.py +++ b/mindaudio/models/fastspeech2/loss.py @@ -1,3 +1,6 @@ +# FastSpeech2Loss in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/model/loss.py + import mindspore as ms import mindspore.nn as nn import mindspore.ops as ops diff --git a/mindaudio/models/fastspeech2/variance_adapter.py b/mindaudio/models/fastspeech2/variance_adapter.py index ba8ecc8..e0f81eb 100644 --- a/mindaudio/models/fastspeech2/variance_adapter.py +++ b/mindaudio/models/fastspeech2/variance_adapter.py @@ -1,3 +1,6 @@ +# VarianceAdaptor with mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/model/modules.py + import mindspore as ms import mindspore.nn as nn import numpy as np diff --git a/mindaudio/models/layers/attention.py b/mindaudio/models/layers/attention.py index c8d20e2..34c4540 100644 --- a/mindaudio/models/layers/attention.py +++ b/mindaudio/models/layers/attention.py @@ -1,3 +1,5 @@ +# Attention in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/attention.py """Multi-Head Attention layer definition.""" import math diff --git a/mindaudio/models/layers/cmvn.py b/mindaudio/models/layers/cmvn.py index 9f12c55..4977cbb 100644 --- a/mindaudio/models/layers/cmvn.py +++ b/mindaudio/models/layers/cmvn.py @@ -1,3 +1,5 @@ +# CMVN in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/cmvn.py """cepstral mean and variance normalization definition.""" import mindspore diff --git a/mindaudio/models/layers/convolution.py b/mindaudio/models/layers/convolution.py index ddec673..ff08d8b 100644 --- a/mindaudio/models/layers/convolution.py +++ b/mindaudio/models/layers/convolution.py @@ -1,3 +1,5 @@ +# Convolution in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/convolution.py """ConvolutionModule definition.""" from typing import Tuple diff --git a/mindaudio/models/layers/embedding.py b/mindaudio/models/layers/embedding.py index eba552f..84717ab 100644 --- a/mindaudio/models/layers/embedding.py +++ b/mindaudio/models/layers/embedding.py @@ -1,3 +1,5 @@ +# Embedding in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/embedding.py """Positonal Encoding Module.""" import math diff --git a/mindaudio/models/layers/positionwise_feed_forward.py b/mindaudio/models/layers/positionwise_feed_forward.py index 4eb664b..332908f 100644 --- a/mindaudio/models/layers/positionwise_feed_forward.py +++ b/mindaudio/models/layers/positionwise_feed_forward.py @@ -1,4 +1,7 @@ +# Positionwise feed forward in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/positionwise_feed_forward.py """Positionwise feed forward layer definition.""" + import mindspore import mindspore.common.dtype as mstype import mindspore.nn as nn diff --git a/mindaudio/models/layers/subsampling.py b/mindaudio/models/layers/subsampling.py index 63b041f..31abebd 100644 --- a/mindaudio/models/layers/subsampling.py +++ b/mindaudio/models/layers/subsampling.py @@ -1,3 +1,5 @@ +# Subsampling in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/subsampling.py """Subsampling layer definition.""" import mindspore diff --git a/mindaudio/models/layers/swish.py b/mindaudio/models/layers/swish.py index 4501c08..717166f 100644 --- a/mindaudio/models/layers/swish.py +++ b/mindaudio/models/layers/swish.py @@ -1,3 +1,5 @@ +# Swish in mindspore. +# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/swish.py """Swish() activation function for Conformer.""" import mindspore diff --git a/mindaudio/models/tasnet.py b/mindaudio/models/tasnet.py index 168c29d..c3740d5 100644 --- a/mindaudio/models/tasnet.py +++ b/mindaudio/models/tasnet.py @@ -1,4 +1,6 @@ -""" TasNet """ +# TasNet in mindspore. +# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/tasnet.py + import mindspore from mindspore import nn, ops diff --git a/mindaudio/models/transformer/constants.py b/mindaudio/models/transformer/constants.py index 757d116..23d0edb 100644 --- a/mindaudio/models/transformer/constants.py +++ b/mindaudio/models/transformer/constants.py @@ -1,4 +1,7 @@ +# Constants in mindspore. +# Copited from https://github.com/ming024/FastSpeech2/blob/master/transformer/Constants.py """Constants and tokens.""" + PAD = 0 UNK = 1 BOS = 2 diff --git a/mindaudio/models/transformer/layers.py b/mindaudio/models/transformer/layers.py index 9accb3f..f00bf57 100644 --- a/mindaudio/models/transformer/layers.py +++ b/mindaudio/models/transformer/layers.py @@ -1,3 +1,6 @@ +# Layers in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/Layers.py + from mindspore import nn from mindaudio.models.transformer.sublayers import ( diff --git a/mindaudio/models/transformer/models.py b/mindaudio/models/transformer/models.py index d425907..ff4068c 100644 --- a/mindaudio/models/transformer/models.py +++ b/mindaudio/models/transformer/models.py @@ -1,3 +1,6 @@ +# Models in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/Models.py + from mindspore import Tensor from mindspore import dtype as mstype from mindspore import nn, ops diff --git a/mindaudio/models/transformer/score_function.py b/mindaudio/models/transformer/score_function.py index 0d6a37a..04f6469 100644 --- a/mindaudio/models/transformer/score_function.py +++ b/mindaudio/models/transformer/score_function.py @@ -1,3 +1,6 @@ +# ScaledDotProductAttention in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/Modules.py + import mindspore.numpy as msnp from mindspore import nn, ops diff --git a/mindaudio/models/transformer/sublayers.py b/mindaudio/models/transformer/sublayers.py index f8b881f..c4ff353 100644 --- a/mindaudio/models/transformer/sublayers.py +++ b/mindaudio/models/transformer/sublayers.py @@ -1,3 +1,6 @@ +# Sublayers in mindspore. +# Adapted from https://github.com/ming024/FastSpeech2/blob/master/transformer/SubLayers.py + import numpy as np from mindspore import dtype as mstype from mindspore import nn, ops diff --git a/mindaudio/models/wavegrad/wavegrad_v190.py b/mindaudio/models/wavegrad/wavegrad_v190.py index 01d19cb..d502a98 100644 --- a/mindaudio/models/wavegrad/wavegrad_v190.py +++ b/mindaudio/models/wavegrad/wavegrad_v190.py @@ -1,6 +1,5 @@ -""" -THIS FILE IS FOR MindSpore 1.9 -""" +# WaveGrad in mindspore. +# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/model.py from math import log as ln diff --git a/mindaudio/utils/initializer.py b/mindaudio/utils/initializer.py index 4a3fde2..181d068 100644 --- a/mindaudio/utils/initializer.py +++ b/mindaudio/utils/initializer.py @@ -1,4 +1,16 @@ -"""initializer function""" +# Licensed under the Apache License, Version 2.0 (the "License"); +# Adapted from https://github.com/mlcommons/training_results_v0.7 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= import math diff --git a/mindaudio/utils/load_files.py b/mindaudio/utils/load_files.py index 61944d6..b358de3 100644 --- a/mindaudio/utils/load_files.py +++ b/mindaudio/utils/load_files.py @@ -1,3 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Adapted from https://github.com/PaddleSpeech/paddlespeech +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Define cmvn-related methods.""" import json diff --git a/mindaudio/utils/mask.py b/mindaudio/utils/mask.py index 390570e..080fc86 100644 --- a/mindaudio/utils/mask.py +++ b/mindaudio/utils/mask.py @@ -1,3 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Adapted from https://github.com/PaddleSpeech/paddlespeech +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """mask function""" from typing import List diff --git a/mindaudio/utils/recognize.py b/mindaudio/utils/recognize.py index 175947b..b73d94e 100644 --- a/mindaudio/utils/recognize.py +++ b/mindaudio/utils/recognize.py @@ -1,4 +1,18 @@ -"""Apply beam search on attention decoder.""" +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Adapted from https://github.com/PaddleSpeech/paddlespeech +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from collections import defaultdict diff --git a/mindaudio/utils/text2token.py b/mindaudio/utils/text2token.py index 054aac4..f270e9c 100755 --- a/mindaudio/utils/text2token.py +++ b/mindaudio/utils/text2token.py @@ -1,4 +1,7 @@ -"""Cut text into single words.""" +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# Adapted from https://github.com/espnet/espnet/tree/master/utils/text2token.py +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + from __future__ import print_function, unicode_literals