Info Visualisierung

Created on March 25|Last edited on March 25
Comment
﻿
import subprocess
import wandb
import ffmpeg
﻿
# Define configuration parameters
PROJECT_NAME="gradient-dissent-transcription"
JOB_DATA_UPLOAD="data-upload"
JOB_TRANSCRIPTION="transcription"
ARTIFACT_VERSION="latest"
# models = ["tiny", "base", "small", "medium", "large"] 
AUDIOFORMAT = "mp3"
MODELSIZE = "large"
path_to_audio_file = "/Users/hans/memory/collections/M22-0010/q4-22/01-whisper/kierkegaard.mp3"
cores = 8 #  1 < integer < 8
﻿
# Define functions to calculate relevant metrics
duration = ffmpeg.probe(path_to_audio_file)['format']['duration']
﻿
﻿
def get_transcription_time(bash_output):
  for line in bash_output.split("\n"):
    if "total" in line:
      # total time comes in milliseconds and has to be normalized to seconds in order to be compatible
      result = float(re.findall("\d+\.\d+", line)[0])/1000
      return(result)
﻿
def get_transcription_factor(duration, transcription_time):
  transcription_factor = float(duration) / float(transcription_time)
  return(transcription_factor)
﻿
# Run the experiment locally with `cores` CPUs
with wandb.init(project=PROJECT_NAME, job_type=JOB_TRANSCRIPTION) as run:
  child = subprocess.Popen(['/Users/hans/memory/collections/M22-0010/q4-22/01-whisper/transcribe.sh', path_to_audio_file, MODELSIZE, str(cores)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  output = child.stdout.read()
  bash_output = output.decode('UTF-8')
  transcription_time = get_transcription_time(bash_output)
  transcription_factor = get_transcription_factor(duration, transcription_time)
  transcript = ""
  with open('/Users/hans/code/whisper.cpp/transcriptions/temp-transcript.txt', 'r') as file:
    transcript = file.read().replace('\n', '')
  run.log({
        'audio_path': path_to_audio_file,
        'transcript': ,
        'audio_format': AUDIOFORMAT,
        'modelsize': MODELSIZE,
        'transcription_time': transcription_time,
        'audio_duration': duration,
        'transcription_factor': transcription_factor
        })
Section 1﻿
Run set374
﻿
﻿
Add a comment