Skip to content

Commit d4e7524

Browse files
authored
Remove references to transcription code from audio and video (#9)
* Remove references to transcription code from audio and video * Add ARM build
1 parent a2342f7 commit d4e7524

File tree

3 files changed

+3
-42
lines changed

3 files changed

+3
-42
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ jobs:
7676
if: (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags')) && github.actor != 'dependabot[bot]'
7777
with:
7878
context: .
79-
platforms: linux/amd64
79+
platforms: linux/amd64, linux/arm64
8080
push: true
8181
tags: ${{ steps.meta.outputs.tags }}
8282
labels: ${{ steps.meta.outputs.labels }}

ingestors/media/audio.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
import logging
2-
from datetime import datetime
32
from followthemoney import model
43
from pymediainfo import MediaInfo
5-
from normality import stringify
64

75
from ingestors.ingestor import Ingestor
86
from ingestors.support.timestamp import TimestampSupport
97
from ingestors.exc import ProcessingException
10-
from ingestors.support.transcription import TranscriptionSupport
118

129
log = logging.getLogger(__name__)
1310

1411

15-
class AudioIngestor(Ingestor, TimestampSupport, TranscriptionSupport):
12+
class AudioIngestor(Ingestor, TimestampSupport):
1613
MIME_TYPES = [
1714
"audio/mpeg",
1815
"audio/mp3",
@@ -59,22 +56,6 @@ def ingest(self, file_path, entity):
5956
entity.add("duration", track.duration)
6057
except Exception as ex:
6158
raise ProcessingException(f"Could not read audio: {ex}") from ex
62-
try:
63-
start = datetime.now()
64-
log.info(f"Attempting to transcribe {file_path}")
65-
self.transcribe(file_path, entity)
66-
elapsed_time = datetime.now() - start
67-
# caution! this can't store an elapsed time larger than 24h
68-
# datetime.seconds capped at [0,86400)
69-
elapsed_time = divmod(elapsed_time.total_seconds(), 60)[0]
70-
log.info(
71-
f"Transcription duration: {elapsed_time} minutes (audio duration: {entity.get('duration')})"
72-
)
73-
except Exception as ex:
74-
# If the transcription fails, the file processing should still count as a success.
75-
# The existance of a transcription is not mandatory, for now.
76-
entity.set("processingError", stringify(ex))
77-
log.error(ex)
7859

7960
@classmethod
8061
def match(cls, file_path, entity):

ingestors/media/video.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
import logging
2-
from datetime import datetime
32
from followthemoney import model
43
from pymediainfo import MediaInfo
5-
from normality import stringify
64

75
from ingestors.ingestor import Ingestor
86
from ingestors.support.timestamp import TimestampSupport
97
from ingestors.exc import ProcessingException
10-
from ingestors.support.transcription import TranscriptionSupport
118

129
log = logging.getLogger(__name__)
1310

1411

15-
class VideoIngestor(Ingestor, TimestampSupport, TranscriptionSupport):
12+
class VideoIngestor(Ingestor, TimestampSupport):
1613
MIME_TYPES = [
1714
"application/x-shockwave-flash",
1815
"video/quicktime",
@@ -47,23 +44,6 @@ def ingest(self, file_path, entity):
4744
entity.add("duration", track.duration)
4845
except Exception as ex:
4946
raise ProcessingException("Could not read video: %r", ex) from ex
50-
try:
51-
start = datetime.now()
52-
log.info(f"Attempting to transcribe {file_path}")
53-
audio_only_file = self.extract_audio(file_path)
54-
self.transcribe(audio_only_file, entity)
55-
elapsed_time = datetime.now() - start
56-
# caution! this can't store an elapsed time larger than 24h
57-
# datetime.seconds capped at [0,86400)
58-
elapsed_time = divmod(elapsed_time.total_seconds(), 60)[0]
59-
log.info(
60-
f"Transcription duration: {elapsed_time} minutes (audio duration: {entity.get('duration')})"
61-
)
62-
except Exception as ex:
63-
# If the transcription fails, the file processing should still count as a success.
64-
# The existance of a transcription is not mandatory, for now.
65-
entity.set("processingError", stringify(ex))
66-
log.error(ex)
6747

6848
@classmethod
6949
def match(cls, file_path, entity):

0 commit comments

Comments
 (0)