descriptinc · anjoola · Feb 1, 2024 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/audiotools/core/ffmpeg.py b/audiotools/core/ffmpeg.py
@@ -61,20 +61,26 @@ def r128stats(filepath: str, quiet: bool):
     return stats_dict
 
 
-def ffprobe_offset(path):
+def ffprobe_offset_and_codec(path: str) -> Tuple[float, str]:
+    """Given a path to a file, returns the start time offset and codec of
+    the first audio stream.
+    """
     ff = ffmpy.FFprobe(
         inputs={path: None},
-        global_options="-show_entries format=start_time:stream=duration,start_time,codec_type,start_pts,time_base -of json -v quiet",
+        global_options="-show_entries format=start_time:stream=duration,start_time,codec_type,codec_name,start_pts,time_base -of json -v quiet",
     )
     streams = json.loads(ff.run(stdout=subprocess.PIPE)[0])["streams"]
     seconds_offset = 0.0
-    # Get the offset of the first audio stream we find
+    codec = None
+
+    # Get the offset and codec of the first audio stream we find
     # and return its start time, if it has one.
     for stream in streams:
         if stream["codec_type"] == "audio":
             seconds_offset = stream.get("start_time", 0.0)
+            codec = stream.get("codec_name")
             break
-    return float(seconds_offset)
+    return float(seconds_offset), codec
 
 
 class FFMPEGMixin:
@@ -174,17 +180,19 @@ def load_from_file_with_ffmpeg(cls, audio_path: str, quiet: bool = True, **kwarg
             )
             ff.run()
 
-            # We pad the file using the start time offset
-            # in case it's an audio stream starting at some
-            # offset in a video container.
-            pad = ffprobe_offset(audio_path)
-            # Don't pad files with discrepancies less than
-            # 0.027s - it's likely due to codec latency.
-            # The amount of latency introduced by mp3 is
-            # 1152, which is 0.0261 44khz. So we
-            # set the threshold here slightly above that.
+            # We pad the file using the start time offset in case it's an audio 
+            # stream starting at some offset in a video container.
+            pad, codec = ffprobe_offset_and_codec(audio_path)
+
+            # For mp3s, don't pad files with discrepancies less than 0.027s -
+            # it's likely due to codec latency. The amount of latency introduced 
+            # by mp3 is 1152, which is 0.0261 44khz. So we set the threshold 
+            # here slightly above that.
             # Source: https://lame.sourceforge.io/tech-FAQ.txt.
-            if pad < 0.027:
+            #
+            # The client should match this logic.
+            # See https://linear.app/descript/issue/MEDIA-1034/mp3-files-decoding-incorrectly-during-exportweb-was-echo-during
+            if codec == 'mp3' and pad < 0.027:
                 pad = 0.0
             ff = ffmpy.FFmpeg(
                 inputs={wav_file: None},