mindspore-lab · LiTingyu1997 · Jul 10, 2025 · Jul 11, 2025
diff --git a/examples/ECAPA-TDNN/reader.py b/examples/ECAPA-TDNN/reader.py
@@ -1,4 +1,5 @@
 """data process"""
+
 import os
 import pickle
 

diff --git a/examples/ECAPA-TDNN/sampler.py b/examples/ECAPA-TDNN/sampler.py
@@ -1,4 +1,5 @@
 """sampler definition"""
+
 from __future__ import division
 
 import math

diff --git a/examples/ECAPA-TDNN/speaker_verification_cosine.py b/examples/ECAPA-TDNN/speaker_verification_cosine.py
@@ -1,6 +1,9 @@
+# ECAPA_TDNN in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/speaker_verification_cosine.py
 """
 Recipe for training a speaker verification system based on cosine distance.
 """
+
 import datetime
 import os
 import pickle

diff --git a/examples/ECAPA-TDNN/train_speaker_embeddings.py b/examples/ECAPA-TDNN/train_speaker_embeddings.py
@@ -1,6 +1,9 @@
+# Training speaker embeddings in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/train_speaker_embeddings.py
 """
 Recipe for training speaker embeddings using the VoxCeleb Dataset.
 """
+
 import math
 import os
 import pickle

diff --git a/examples/ECAPA-TDNN/voxceleb_prepare.py b/examples/ECAPA-TDNN/voxceleb_prepare.py
@@ -1,3 +1,5 @@
+# Data preparation in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/SpeakerRec/voxceleb_prepare.py
 """
 Data preparation, from mindaudio VoxCeleb recipe.
 """

diff --git a/examples/conformer/asr_model.py b/examples/conformer/asr_model.py
@@ -1,3 +1,5 @@
+# ASR model in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/asr_model.py
 """Definition of ASR model."""
 
 import mindspore

diff --git a/examples/conformer/predict.py b/examples/conformer/predict.py
@@ -1,4 +1,5 @@
-"""ASR inference process.
+"""
+ASR inference process.
 
 python predict.py --config_path <CONFIG_FILE>
 """

diff --git a/examples/conformer/train.py b/examples/conformer/train.py
@@ -1,4 +1,5 @@
-"""ASR training process.
+"""
+ASR training process.
 
 python train.py --config_path <CONFIG_FILE>
 """

diff --git a/examples/conv_tasnet/data.py b/examples/conv_tasnet/data.py
@@ -1,3 +1,5 @@
+# AudioDataLoader in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/data.py
 """
 Logic:
 1. AudioDataLoader generate a minibatch from AudioDataset, the size of this
@@ -16,14 +18,11 @@
     Each targets's shape is B x C x T
 """
 
-import argparse
 import json
 import math
 import os
 
-import mindspore.dataset as ds
 import numpy as np
-from mindspore import context
 
 import mindaudio.data.io as io
 
@@ -176,27 +175,3 @@ def sort_and_pad(self, batch):
 
         sources_pad = sources_pad.transpose((0, 2, 1))
         return mixtures_pad, ilens, sources_pad
-
-
-if __name__ == "__main__":
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=4)
-    args = parser.parse_args()
-    print(args)
-    tr_dataset = DatasetGenerator(
-        args.train_dir,
-        args.batch_size,
-        sample_rate=args.sample_rate,
-        segment=args.segment,
-    )
-    dataset = ds.GeneratorDataset(
-        tr_dataset, ["mixture", "lens", "sources"], shuffle=False
-    )
-    dataset = dataset.batch(batch_size=5)
-    iter_per_epoch = dataset.get_dataset_size()
-    print(iter_per_epoch)
-    h = 0
-    for data in dataset.create_dict_iterator():
-        h += 1
-        print(data["mixture"])
-        print(data["lens"])
-        print(data["sources"])
diff --git a/examples/conv_tasnet/eval.py b/examples/conv_tasnet/eval.py
@@ -1,3 +1,6 @@
+# Evaluation of Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py
+
 import mindspore
 import mindspore.dataset as ds
 import mindspore.ops as ops

diff --git a/examples/conv_tasnet/preprocess.py b/examples/conv_tasnet/preprocess.py
@@ -1,3 +1,5 @@
+# Preprocess of Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/preprocess.py
 """ Convert the relevant information in the audio wav file to a json file """
 
 import argparse

diff --git a/examples/conv_tasnet/train.py b/examples/conv_tasnet/train.py
@@ -1,3 +1,6 @@
+# Train of Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/train.py
+
 import os
 
 import mindspore.dataset as ds

diff --git a/examples/deepspeech2/eval.py b/examples/deepspeech2/eval.py
@@ -1,3 +1,5 @@
+# Evaluation of deepspeech2 in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/deepspeech_pytorch/validation.py
 """
 Eval DeepSpeech2
 """

diff --git a/examples/deepspeech2/train.py b/examples/deepspeech2/train.py
@@ -1,3 +1,5 @@
+# Train of deepspeech2 in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/deepspeech_pytorch/training.py
 """train_criteo."""
 
 import os

diff --git a/examples/fastspeech2/dataset.py b/examples/fastspeech2/dataset.py
@@ -1,3 +1,6 @@
+# LJSpeech dataloader in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/dataset.py
+
 import os
 import sys
 from multiprocessing import cpu_count

diff --git a/examples/fastspeech2/generate.py b/examples/fastspeech2/generate.py
@@ -1,3 +1,6 @@
+# Synthesize in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/synthesize.py
+
 import argparse
 import os
 import re

diff --git a/examples/fastspeech2/ljspeech.py b/examples/fastspeech2/ljspeech.py
@@ -1,3 +1,6 @@
+# LJSpeech dataloader in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py
+
 import csv
 import os
 

diff --git a/examples/fastspeech2/preprocess.py b/examples/fastspeech2/preprocess.py
@@ -1,5 +1,6 @@
 # Given the path to ljspeech/wavs,
 # this script converts wav files to .npy features used for training.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/preprocessor.py
 
 import argparse
 import os

diff --git a/examples/fastspeech2/text/__init__.py b/examples/fastspeech2/text/__init__.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from from https://github.com/keithito/tacotron
 import re
 
 from text import cleaners

diff --git a/examples/fastspeech2/text/cleaners.py b/examples/fastspeech2/text/cleaners.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from https://github.com/keithito/tacotron
 
 """
 Cleaners are transformations that run over the input text at both training and eval time.

diff --git a/examples/fastspeech2/text/cmudict.py b/examples/fastspeech2/text/cmudict.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from https://github.com/keithito/tacotron
 
 import re
 

diff --git a/examples/fastspeech2/text/numbers.py b/examples/fastspeech2/text/numbers.py
@@ -1,4 +1,4 @@
-""" from https://github.com/keithito/tacotron """
+# Copited from https://github.com/keithito/tacotron
 
 import re
 

diff --git a/examples/fastspeech2/text/pinyin.py b/examples/fastspeech2/text/pinyin.py
@@ -1,3 +1,5 @@
+# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/pinyin.py
+
 initials = [
     "b",
     "c",

diff --git a/examples/fastspeech2/text/symbols.py b/examples/fastspeech2/text/symbols.py
@@ -1,3 +1,5 @@
+# Copited from https://github.com/ming024/FastSpeech2/blob/master/text/symbols.py
+
 from text import pinyin
 
 valid_symbols = [

diff --git a/examples/fastspeech2/train.py b/examples/fastspeech2/train.py
@@ -1,3 +1,6 @@
+# Training in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/train.py
+
 import argparse
 import ast
 import os

diff --git a/examples/tasnet/data.py b/examples/tasnet/data.py
@@ -1,4 +1,6 @@
-""" data """
+# AudioDataLoader in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/data.py
+
 import json
 import os
 

diff --git a/examples/tasnet/eval.py b/examples/tasnet/eval.py
@@ -1,3 +1,6 @@
+# Evaluation of TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/evaluate.py
+
 import argparse
 import json
 import os
@@ -6,14 +9,7 @@
 import mindspore.dataset as ds
 import mindspore.ops as ops
 from data import DatasetGenerator
-from mindspore import (
-    Parameter,
-    Tensor,
-    context,
-    load_checkpoint,
-    load_param_into_net,
-    set_seed,
-)
+from mindspore import context, load_checkpoint, load_param_into_net, set_seed
 
 import mindaudio.data.io as io
 from mindaudio.loss.separation_loss import Separation_Loss

diff --git a/examples/tasnet/preprocess.py b/examples/tasnet/preprocess.py
@@ -1,3 +1,5 @@
+# Preprocess of TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/preprocess.py
 """ Convert the relevant information in the audio wav file to a json file """
 
 import argparse

diff --git a/examples/tasnet/train.py b/examples/tasnet/train.py
@@ -1,4 +1,6 @@
-""" Train """
+# Training of TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/train.py
+
 import argparse
 import json
 import os

diff --git a/examples/tasnet/train_wrapper.py b/examples/tasnet/train_wrapper.py
@@ -1,4 +1,5 @@
 """ Training Wrapper """
+
 import mindspore.nn as nn
 import mindspore.ops as ops
 from mindspore import context

diff --git a/examples/wavegrad/dataset.py b/examples/wavegrad/dataset.py
@@ -1,3 +1,6 @@
+# AudioDataLoader in mindspore.
+# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/dataset.py
+
 from multiprocessing import cpu_count
 
 import numpy as np

diff --git a/examples/wavegrad/ljspeech.py b/examples/wavegrad/ljspeech.py
@@ -1,3 +1,6 @@
+# LJSpeech dataloader in mindspore.
+# Adapted from https://github.com/ming024/FastSpeech2/blob/master/preprocessor/ljspeech.py
+
 import csv
 import os
 

diff --git a/examples/wavegrad/preprocess.py b/examples/wavegrad/preprocess.py
@@ -1,3 +1,6 @@
+# Preprocess in mindspore.
+# Adapted from https://github.com/lmnt-com/wavegrad/blob/master/src/wavegrad/preprocess.py
+
 import sys
 from multiprocessing import Pool, cpu_count
 

diff --git a/mindaudio/data/aishell.py b/mindaudio/data/aishell.py
@@ -1,3 +1,6 @@
+# AISHELL dataloader in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/AISHELL-1/aishell_prepare.py
+
 import argparse
 import csv
 import glob

diff --git a/mindaudio/data/augment.py b/mindaudio/data/augment.py
@@ -1,3 +1,6 @@
+# Time-Domain Sequential Data Augmentation Classes
+# Adapted from  https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/augment/time_domain.py
+
 import random
 
 import mindspore as ms

diff --git a/mindaudio/data/features.py b/mindaudio/data/features.py
@@ -1,3 +1,6 @@
+# Feature extraction
+# Adapted from  https://github.com/librosa/librosa/tree/main/librosa/feature/spectral.py
+
 import mindspore as ms
 import mindspore.dataset.audio as msaudio
 import numpy as np

diff --git a/mindaudio/data/filters.py b/mindaudio/data/filters.py
@@ -1,3 +1,8 @@
+# Filter bank construction
+# Adapted from https://github.com/librosa/librosa/tree/main/librosa/filters.py
+# and
+# https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/processing/signal_processing.py
+
 from typing import Optional, Union
 
 try:

diff --git a/mindaudio/data/io.py b/mindaudio/data/io.py
@@ -1,3 +1,6 @@
+# Module to read / write wav files using NumPy arrays
+# Adapted from  https://github.com/scipy/scipy/tree/main/scipy/io/wavfile.py
+
 import collections
 import io
 import struct

diff --git a/mindaudio/data/librispeech.py b/mindaudio/data/librispeech.py
@@ -1,3 +1,6 @@
+# LibriSpeech dataloader in mindspore.
+# Adapted from https://github.com/SeanNaren/deepspeech.pytorch/tree/master/data/librispeech.py
+
 import argparse
 import json
 import os

diff --git a/mindaudio/data/processing.py b/mindaudio/data/processing.py
@@ -1,3 +1,8 @@
+# Signal processing utilities
+# Adapted from  https://github.com/librosa/librosa/tree/main/librosa/core/audio.py
+# and
+# https://github.com/facebookresearch/AugLy/blob/main/augly/audio/functional.py
+
 import math
 
 import mindspore as ms

diff --git a/mindaudio/data/spectrum.py b/mindaudio/data/spectrum.py
@@ -1,3 +1,5 @@
+# Adapted from  https://github.com/librosa/librosa/tree/main/librosa/core/spectrum.py
+
 import mindspore as ms
 import mindspore.dataset.audio as msaudio
 import numpy as np

diff --git a/mindaudio/data/voxceleb.py b/mindaudio/data/voxceleb.py
@@ -1,3 +1,5 @@
+# Voxceleb dataloader in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/recipes/VoxCeleb/voxceleb_prepare.py
 """
 Data preparation, from mindaudio VoxCeleb recipe.
 """

diff --git a/mindaudio/loss/AdditiveAngularMargin.py b/mindaudio/loss/AdditiveAngularMargin.py
@@ -1,3 +1,6 @@
+# AdditiveAngularMargin in mindspore.
+# Adapted from https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/losses.py
+
 import math
 
 import mindspore as ms

diff --git a/mindaudio/loss/ctc_loss.py b/mindaudio/loss/ctc_loss.py
@@ -1,3 +1,5 @@
+# CTC in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/ctc.py
 """CTC layer."""
 
 import mindspore

diff --git a/mindaudio/loss/label_smoothing_loss.py b/mindaudio/loss/label_smoothing_loss.py
@@ -1,3 +1,5 @@
+# Label_smoothing_loss in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer/label_smoothing_loss.py
 """Label smoothing module."""
 
 import mindspore

diff --git a/mindaudio/loss/separation_loss.py b/mindaudio/loss/separation_loss.py
@@ -1,4 +1,7 @@
+# Separation_loss in mindspore.
+# Adapted from https://github.com/kaituoxu/TasNet/blob/master/src/pit_criterion.py
 """ Loss """
+
 from itertools import permutations
 
 import mindspore

diff --git a/mindaudio/metric/snr.py b/mindaudio/metric/snr.py
@@ -1,3 +1,6 @@
+# SNR in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/evaluate.py
+
 import numpy as np
 from mir_eval.separation import bss_eval_sources
 

diff --git a/mindaudio/models/conformer.py b/mindaudio/models/conformer.py
@@ -1,4 +1,6 @@
-"""Definition of ASR model."""
+# Conformer in mindspore.
+# Adapted from https://github.com/wenet-e2e/wenet/blob/main/wenet/transformer
+"""Definition of conformer model."""
 
 from typing import Optional, Tuple
 

diff --git a/mindaudio/models/conv_tasnet.py b/mindaudio/models/conv_tasnet.py
@@ -1,3 +1,6 @@
+# Conv-TasNet in mindspore.
+# Adapted from https://github.com/kaituoxu/Conv-TasNet/blob/master/src/conv_tasnet.py
+
 import argparse
 import math