From e8d06c616f1570a21410ad9eec860c9f503dec9c Mon Sep 17 00:00:00 2001
From: yrriban <yrriban@gmail.com>
Date: Sat, 14 Jun 2025 02:55:43 -0400
Subject: [PATCH] Generally streamline and break the concat routine into
 functions.

Ensure the whole thing is PEP 8 compliant.  Also output a text file
summary for chapter markers.
---
 dcc/concat.py | 292 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 182 insertions(+), 110 deletions(-)

diff --git a/dcc/concat.py b/dcc/concat.py
index 764f977..1f28199 100644
--- a/dcc/concat.py
+++ b/dcc/concat.py
@@ -8,6 +8,7 @@ import math
 import numpy as np
 import wand.image
 
+
 class Concat(dcc.doom_base.Wad):
     def get_parser(self, prog_name):
         parser = super().get_parser(prog_name)
@@ -20,120 +21,191 @@ class Concat(dcc.doom_base.Wad):
         logging.basicConfig()
         av.logging.set_level(av.logging.VERBOSE)
         av.logging.restore_default_callback()
-        videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4")
-        output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w")
-        offset = 0
+        videos = (
+            self.fabricate.joinpath(parsed_args.wad)
+            .glob(f"{parsed_args.wad}_map*.mp4")
+        )
+        fn_base = (
+            f"{parsed_args.wad}_maps{parsed_args.start_map}"
+            + f"to{parsed_args.end_map}"
+        )
+        output = av.open(
+            self.fabricate.joinpath(parsed_args.wad).joinpath(
+                f"{fn_base}.mp4"), "w"
+        )
+        summary_file = open(
+            self.fabricate.joinpath(parsed_args.wad).joinpath(
+                f"{fn_base}.txt"), "w"
+        )
+
+        self._offset = 0
+        summary = []
         # We'd like to use the concat filter here and connect everything into a
         # single filter graph... but it produces a "Resource temporarily
-        # unavailable" error when switching to inputs after the first.  Presumably
-        # fixable, but it's easier to just make one graph per video and mux
-        # everything together at the end.
+        # unavailable" error when switching to inputs after the first.
+        # Presumably fixable, but it's easier to just make one graph per video
+        # and mux everything together at the end.
         for v in sorted(videos):
             # TODO: Support UDoom in literally any way.
-            if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and
-                v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"):
+            if not (
+                v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4"
+                and v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"
+            ):
                 continue
-
-            chunk = av.open(v)
-            if not (len(chunk.streams.video) == 1 and len(chunk.streams.audio) == 1):
-                raise Exception(f"irregular chunk {v}: streams {chunk.streams} (expected 1 video & 1 audio)")
-
-            ograph = av.filter.Graph()
-            sink = ograph.add("buffersink")
-            asink = ograph.add("abuffersink")
-
-            if not parsed_args.nooverlay:
-                img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width)
-                mapstring = v.name[-6:-4]
-                text = self._config["map_names"][f"map{mapstring}"]
-                dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
-                img.trim(reset_coords=True)
-                img.border("graya(25%, 25%)", 10, 10)
-                img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
-                # for this to work... the image needs to have a width that's a multiple
-                # of 8.  dude whyyyyyyy
-                padfactor=8
-                img.border("transparent", padfactor, 0)
-                img.crop(width=img.width-img.width%padfactor, height=img.height)
-
-            if len(output.streams.get()) == 0:
-                # We can't use the input stream as a template here; it doesn't
-                # have everything needed to do encoding and will fail
-                # mysteriously later.
-                vs = chunk.streams.video[0]
-                output.add_stream("h264", rate=int(vs.time_base.denominator/vs.time_base.numerator))
-                output.streams[0].extradata = copy.deepcopy(vs.extradata)
-                output.streams[0].height=vs.height
-                output.streams[0].width=vs.width
-                output.streams[0].qmax = vs.qmax
-                output.streams[0].qmin = vs.qmin
-                output.streams[0].codec_context.bit_rate = vs.codec_context.bit_rate
-                output.streams[0].codec_context.framerate = vs.base_rate
-                output.streams[0].codec_context.pix_fmt = vs.codec_context.pix_fmt
-                # The following are only used for encoding and have no equivalent on the input stream.
-                output.streams[0].profile="High"
-                output.streams[0].codec_context.gop_size=30
-                output.streams[0].codec_context.max_b_frames=2
-
-                astr = chunk.streams.audio[0]
-                output.add_stream("aac", rate=astr.rate)
-                output.streams[1].extradata = copy.deepcopy(astr.extradata)
-                output.streams[1].bit_rate=astr.bit_rate
-
-            src = ograph.add_buffer(template=chunk.streams.video[0], time_base=chunk.streams.video[0].time_base)
-            asrc = ograph.add_abuffer(template=chunk.streams.audio[0], time_base=chunk.streams.audio[0].time_base)
-            # TODO: video fades are absolute relative to the input video; audio
-            # fades need to have their timestamps offset by the position in the
-            # final video.  Clarify if this is really necessary.
-            frame_rate = chunk.streams.video[0].base_rate
-            sample_rate = chunk.streams.audio[0].rate
-            ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
-            ofade = ograph.add("fade", args="out:{}:{}".format((chunk.duration*frame_rate/1000000)-frame_rate, frame_rate))
-            iafade = ograph.add("afade", args="in:{}:{}".format(offset*sample_rate/1000000, sample_rate))
-            oafade = ograph.add("afade", args="out:{}:{}".format(((offset+chunk.duration)*sample_rate/1000000)-sample_rate, sample_rate))
-            if not parsed_args.nooverlay:
-                overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base)
-                overlay_fo = ograph.add("fade", args="out:{}:{}".format(4*frame_rate, frame_rate))
-                overlay.link_to(overlay_fo, 0, 0)
-                composite = ograph.add("overlay", args="x=4:y=4")
-                src.link_to(composite, 0, 0)
-                overlay_fo.link_to(composite, 0, 1)
-                composite.link_to(ifade, 0, 0)
-            else:
-                src.link_to(ifade, 0, 0)
-
-            asrc.link_to(iafade, 0, 0)
-            ifade.link_to(ofade, 0, 0)
-            iafade.link_to(oafade, 0, 0)
-            ofade.link_to(sink, 0, 0)
-            oafade.link_to(asink, 0, 0)
-            ograph.configure()
-
-            for packet in chunk.demux():
-                if packet.dts is None:
-                    continue
-                packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
-                packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
-                if packet.stream == chunk.streams.video[0]:
-                    for ifr in packet.decode():
-                        if not parsed_args.nooverlay:
-                            text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")
-                            text_frame.planes[0].update(img.make_blob(format="rgba"))
-                            text_frame.pts = ifr.pts
-                            text_frame.dts = ifr.dts
-                            text_frame.time_base = ifr.time_base
-                            overlay.push(text_frame)
-                        src.push(ifr)
-                        ofr = sink.pull()
-                        for p in output.streams[packet.stream_index].encode(ofr):
-                            output.mux(p)
-                else:
-                    for ifr in packet.decode():
-                        asrc.push(ifr)
-                        ofr = asink.pull()
-                        for p in output.streams[packet.stream_index].encode(ofr):
-                            output.mux(p)
-            offset += chunk.duration
-            chunk.close()
+            start_time = self._offset / 1000000
+            text = self._add_chunk(v, output, not parsed_args.nooverlay)
+            list.append(
+                summary, f"{text} {math.floor(start_time / 60):02}:"
+                + f"{math.floor(start_time % 60):02}"
+            )
         output.close()
 
+        for line in summary:
+            summary_file.write(f"{line}\n") 
+        summary_file.close()
+
+    def _add_chunk(self, v, output, overlay):
+        chunk = av.open(v)
+        if not (len(chunk.streams.video) == 1
+                and len(chunk.streams.audio) == 1):
+            raise Exception(
+                f"irregular chunk {v}: streams {chunk.streams} "
+                + f"(expected 1 video & 1 audio)"
+            )
+
+        ograph = av.filter.Graph()
+        sink = ograph.add("buffersink")
+        asink = ograph.add("abuffersink")
+
+        text = ""
+        if overlay:
+            img = wand.image.Image(
+                height=chunk.streams[0].height,
+                width=chunk.streams[0].width
+            )
+            mapstring = v.name[-6:-4]
+            text = self._config["map_names"][f"map{mapstring}"]
+            dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
+            img.trim(reset_coords=True)
+            img.border("graya(25%, 25%)", 10, 10)
+            img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
+            # for this to work... the image needs to have a width that's a
+            # multiple of 8.  dude whyyyyyyy
+            padfactor = 8
+            img.border("transparent", padfactor, 0)
+            img.crop(
+                width=img.width - img.width % padfactor,
+                height=img.height
+            )
+
+        if len(output.streams.get()) == 0:
+            # We can't use the input stream as a template here; it doesn't
+            # have everything needed to do encoding and will fail
+            # mysteriously later.
+            vs = chunk.streams.video[0]
+            vr = int(vs.time_base.denominator/vs.time_base.numerator)
+            ovs = output.add_stream("h264", rate=vr)
+            ovs.extradata = copy.deepcopy(vs.extradata)
+            ovs.height = vs.height
+            ovs.width = vs.width
+            ovs.qmax = vs.qmax
+            ovs.qmin = vs.qmin
+            ovs.codec_context.bit_rate = vs.codec_context.bit_rate
+            ovs.codec_context.framerate = vs.base_rate
+            ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt
+            # The following are only used for encoding and have no equivalent
+            # on the input stream.
+            ovs.profile = "High"
+            ovs.codec_context.gop_size = 30
+            ovs.codec_context.max_b_frames = 2
+
+            astr = chunk.streams.audio[0]
+            oas = output.add_stream("aac", rate=astr.rate)
+            oas.extradata = copy.deepcopy(astr.extradata)
+            oas.bit_rate = astr.bit_rate
+
+        src = ograph.add_buffer(
+            template=chunk.streams.video[0],
+            time_base=chunk.streams.video[0].time_base
+        )
+        asrc = ograph.add_abuffer(
+            template=chunk.streams.audio[0],
+            time_base=chunk.streams.audio[0].time_base
+        )
+        # TODO: video fades are absolute relative to the input video; audio
+        # fades need to have their timestamps offset by the position in the
+        # final video.  Clarify if this is really necessary.
+        frame_rate = chunk.streams.video[0].base_rate
+        sample_rate = chunk.streams.audio[0].rate
+        ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
+        ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate
+        ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}")
+        iafade_start = self._offset * sample_rate / 1000000
+        iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}")
+        oafade_start = (
+            (self._offset + chunk.duration) * sample_rate / 1000000
+            - sample_rate
+        )
+        oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}")
+
+        if overlay:
+            overlay = ograph.add_buffer(
+                width=img.width, height=img.height,
+                format="rgba", time_base=chunk.streams.video[0].time_base
+            )
+            overlay_fo = ograph.add(
+                "fade", args=f"out:{4 * frame_rate}:{frame_rate}"
+            )
+            overlay.link_to(overlay_fo, 0, 0)
+            composite = ograph.add("overlay", args="x=4:y=4")
+            src.link_to(composite, 0, 0)
+            overlay_fo.link_to(composite, 0, 1)
+            composite.link_to(ifade, 0, 0)
+        else:
+            src.link_to(ifade, 0, 0)
+
+        asrc.link_to(iafade, 0, 0)
+        ifade.link_to(ofade, 0, 0)
+        iafade.link_to(oafade, 0, 0)
+        ofade.link_to(sink, 0, 0)
+        oafade.link_to(asink, 0, 0)
+        ograph.configure()
+
+        for packet in chunk.demux():
+            if packet.dts is None:
+                continue
+            pof = (
+                (self._offset * packet.time_base.denominator)
+                / (packet.time_base.numerator * 1000000)
+            )
+            packet.dts += pof
+            packet.pts += pof
+            if packet.stream == chunk.streams.video[0]:
+                for ifr in packet.decode():
+                    if overlay:
+                        overlay.push(self._make_text_frame(img, ifr))
+                    src.push(ifr)
+                    ofr = sink.pull()
+                    for p in output.streams[packet.stream_index].encode(ofr):
+                        output.mux(p)
+            else:
+                for ifr in packet.decode():
+                    asrc.push(ifr)
+                    ofr = asink.pull()
+                    for p in output.streams[packet.stream_index].encode(ofr):
+                        output.mux(p)
+        self._offset += chunk.duration
+        chunk.close()
+
+        return text
+
+    def _make_text_frame(self, img, ifr):
+        # We need to give each frame its own memory it can own.
+        text_frame = av.video.frame.VideoFrame(
+            img.width, img.height, format="rgba"
+        )
+        text_frame.planes[0].update(img.make_blob(format="rgba"))
+        text_frame.pts = ifr.pts
+        text_frame.dts = ifr.dts
+        text_frame.time_base = ifr.time_base
+        return text_frame