Generally streamline and break the concat routine into functions.

Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers.
2025-06-14 02:55:43 -04:00 · 2025-06-14 02:55:43 -04:00 · e8d06c616f
commit e8d06c616f
parent 6baacd9b89
1 changed files with 182 additions and 110 deletions
--- a/dcc/concat.py
+++ b/dcc/concat.py
@ -8,6 +8,7 @@ import math
 import numpy as np
 import wand.image
 class Concat(dcc.doom_base.Wad):
    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
@ -20,80 +21,141 @@ class Concat(dcc.doom_base.Wad):
        logging.basicConfig()
        av.logging.set_level(av.logging.VERBOSE)
        av.logging.restore_default_callback()
-        videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4")
+        videos = (
-        output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w")
+            self.fabricate.joinpath(parsed_args.wad)
-        offset = 0
+            .glob(f"{parsed_args.wad}_map*.mp4")
        )
        fn_base = (
            f"{parsed_args.wad}_maps{parsed_args.start_map}"
            + f"to{parsed_args.end_map}"
        )
        output = av.open(
            self.fabricate.joinpath(parsed_args.wad).joinpath(
                f"{fn_base}.mp4"), "w"
        )
        summary_file = open(
            self.fabricate.joinpath(parsed_args.wad).joinpath(
                f"{fn_base}.txt"), "w"
        )
        self._offset = 0
        summary = []
        # We'd like to use the concat filter here and connect everything into a
        # single filter graph... but it produces a "Resource temporarily
-        # unavailable" error when switching to inputs after the first.  Presumably
+        # unavailable" error when switching to inputs after the first.
-        # fixable, but it's easier to just make one graph per video and mux
+        # Presumably fixable, but it's easier to just make one graph per video
-        # everything together at the end.
+        # and mux everything together at the end.
        for v in sorted(videos):
            # TODO: Support UDoom in literally any way.
-            if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and
+            if not (
-                v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"):
+                v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4"
                and v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"
            ):
                continue
            start_time = self._offset / 1000000
            text = self._add_chunk(v, output, not parsed_args.nooverlay)
            list.append(
                summary, f"{text} {math.floor(start_time / 60):02}:"
                + f"{math.floor(start_time % 60):02}"
            )
        output.close()
        for line in summary:
            summary_file.write(f"{line}\n") 
        summary_file.close()
    def _add_chunk(self, v, output, overlay):
        chunk = av.open(v)
-            if not (len(chunk.streams.video) == 1 and len(chunk.streams.audio) == 1):
+        if not (len(chunk.streams.video) == 1
-                raise Exception(f"irregular chunk {v}: streams {chunk.streams} (expected 1 video & 1 audio)")
+                and len(chunk.streams.audio) == 1):
            raise Exception(
                f"irregular chunk {v}: streams {chunk.streams} "
                + f"(expected 1 video & 1 audio)"
            )
        ograph = av.filter.Graph()
        sink = ograph.add("buffersink")
        asink = ograph.add("abuffersink")
-            if not parsed_args.nooverlay:
+        text = ""
-                img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width)
+        if overlay:
            img = wand.image.Image(
                height=chunk.streams[0].height,
                width=chunk.streams[0].width
            )
            mapstring = v.name[-6:-4]
            text = self._config["map_names"][f"map{mapstring}"]
            dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
            img.trim(reset_coords=True)
            img.border("graya(25%, 25%)", 10, 10)
            img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
-                # for this to work... the image needs to have a width that's a multiple
+            # for this to work... the image needs to have a width that's a
-                # of 8.  dude whyyyyyyy
+            # multiple of 8.  dude whyyyyyyy
-                padfactor=8
+            padfactor = 8
            img.border("transparent", padfactor, 0)
-                img.crop(width=img.width-img.width%padfactor, height=img.height)
+            img.crop(
                width=img.width - img.width % padfactor,
                height=img.height
            )
        if len(output.streams.get()) == 0:
            # We can't use the input stream as a template here; it doesn't
            # have everything needed to do encoding and will fail
            # mysteriously later.
            vs = chunk.streams.video[0]
-                output.add_stream("h264", rate=int(vs.time_base.denominator/vs.time_base.numerator))
+            vr = int(vs.time_base.denominator/vs.time_base.numerator)
-                output.streams[0].extradata = copy.deepcopy(vs.extradata)
+            ovs = output.add_stream("h264", rate=vr)
-                output.streams[0].height=vs.height
+            ovs.extradata = copy.deepcopy(vs.extradata)
-                output.streams[0].width=vs.width
+            ovs.height = vs.height
-                output.streams[0].qmax = vs.qmax
+            ovs.width = vs.width
-                output.streams[0].qmin = vs.qmin
+            ovs.qmax = vs.qmax
-                output.streams[0].codec_context.bit_rate = vs.codec_context.bit_rate
+            ovs.qmin = vs.qmin
-                output.streams[0].codec_context.framerate = vs.base_rate
+            ovs.codec_context.bit_rate = vs.codec_context.bit_rate
-                output.streams[0].codec_context.pix_fmt = vs.codec_context.pix_fmt
+            ovs.codec_context.framerate = vs.base_rate
-                # The following are only used for encoding and have no equivalent on the input stream.
+            ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt
-                output.streams[0].profile="High"
+            # The following are only used for encoding and have no equivalent
-                output.streams[0].codec_context.gop_size=30
+            # on the input stream.
-                output.streams[0].codec_context.max_b_frames=2
+            ovs.profile = "High"
            ovs.codec_context.gop_size = 30
            ovs.codec_context.max_b_frames = 2
            astr = chunk.streams.audio[0]
-                output.add_stream("aac", rate=astr.rate)
+            oas = output.add_stream("aac", rate=astr.rate)
-                output.streams[1].extradata = copy.deepcopy(astr.extradata)
+            oas.extradata = copy.deepcopy(astr.extradata)
-                output.streams[1].bit_rate=astr.bit_rate
+            oas.bit_rate = astr.bit_rate
-            src = ograph.add_buffer(template=chunk.streams.video[0], time_base=chunk.streams.video[0].time_base)
+        src = ograph.add_buffer(
-            asrc = ograph.add_abuffer(template=chunk.streams.audio[0], time_base=chunk.streams.audio[0].time_base)
+            template=chunk.streams.video[0],
            time_base=chunk.streams.video[0].time_base
        )
        asrc = ograph.add_abuffer(
            template=chunk.streams.audio[0],
            time_base=chunk.streams.audio[0].time_base
        )
        # TODO: video fades are absolute relative to the input video; audio
        # fades need to have their timestamps offset by the position in the
        # final video.  Clarify if this is really necessary.
        frame_rate = chunk.streams.video[0].base_rate
        sample_rate = chunk.streams.audio[0].rate
        ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
-            ofade = ograph.add("fade", args="out:{}:{}".format((chunk.duration*frame_rate/1000000)-frame_rate, frame_rate))
+        ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate
-            iafade = ograph.add("afade", args="in:{}:{}".format(offset*sample_rate/1000000, sample_rate))
+        ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}")
-            oafade = ograph.add("afade", args="out:{}:{}".format(((offset+chunk.duration)*sample_rate/1000000)-sample_rate, sample_rate))
+        iafade_start = self._offset * sample_rate / 1000000
-            if not parsed_args.nooverlay:
+        iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}")
-                overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base)
+        oafade_start = (
-                overlay_fo = ograph.add("fade", args="out:{}:{}".format(4*frame_rate, frame_rate))
+            (self._offset + chunk.duration) * sample_rate / 1000000
            - sample_rate
        )
        oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}")
        if overlay:
            overlay = ograph.add_buffer(
                width=img.width, height=img.height,
                format="rgba", time_base=chunk.streams.video[0].time_base
            )
            overlay_fo = ograph.add(
                "fade", args=f"out:{4 * frame_rate}:{frame_rate}"
            )
            overlay.link_to(overlay_fo, 0, 0)
            composite = ograph.add("overlay", args="x=4:y=4")
            src.link_to(composite, 0, 0)
@ -112,17 +174,16 @@ class Concat(dcc.doom_base.Wad):
        for packet in chunk.demux():
            if packet.dts is None:
                continue
-                packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
+            pof = (
-                packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
+                (self._offset * packet.time_base.denominator)
                / (packet.time_base.numerator * 1000000)
            )
            packet.dts += pof
            packet.pts += pof
            if packet.stream == chunk.streams.video[0]:
                for ifr in packet.decode():
-                        if not parsed_args.nooverlay:
+                    if overlay:
-                            text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")
+                        overlay.push(self._make_text_frame(img, ifr))
                            text_frame.planes[0].update(img.make_blob(format="rgba"))
                            text_frame.pts = ifr.pts
                            text_frame.dts = ifr.dts
                            text_frame.time_base = ifr.time_base
                            overlay.push(text_frame)
                    src.push(ifr)
                    ofr = sink.pull()
                    for p in output.streams[packet.stream_index].encode(ofr):
@ -133,7 +194,18 @@ class Concat(dcc.doom_base.Wad):
                    ofr = asink.pull()
                    for p in output.streams[packet.stream_index].encode(ofr):
                        output.mux(p)
-            offset += chunk.duration
+        self._offset += chunk.duration
        chunk.close()
        output.close()
        return text
    def _make_text_frame(self, img, ifr):
        # We need to give each frame its own memory it can own.
        text_frame = av.video.frame.VideoFrame(
            img.width, img.height, format="rgba"
        )
        text_frame.planes[0].update(img.make_blob(format="rgba"))
        text_frame.pts = ifr.pts
        text_frame.dts = ifr.dts
        text_frame.time_base = ifr.time_base
        return text_frame