doomcc/dcc/concat.py

import av
import copy
import dcc.doom_base
import enum
import fractions
import io
import logging
import math
import numpy as np
import wand.image


class State(enum.Enum):
    NOT_STARTED = 1
    STARTED = 2
    DONE = 3


class Concat(dcc.doom_base.Wad):
    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
        parser.add_argument("start_map")
        parser.add_argument("end_map")
        parser.add_argument("-n", "--nooverlay", action="store_true")
        return parser

    def take_action(self, parsed_args):
        logging.basicConfig()
        av.logging.set_level(av.logging.VERBOSE)
        av.logging.restore_default_callback()
        videodir = self.fabricate.joinpath(parsed_args.wad)
        fn_base = (
            f"{parsed_args.wad}_maps{parsed_args.start_map}"
            + f"to{parsed_args.end_map}"
        )
        output = av.open(
            self.fabricate.joinpath(parsed_args.wad).joinpath(
                f"{fn_base}.mp4"), "w"
        )
        summary_file = open(
            self.fabricate.joinpath(parsed_args.wad).joinpath(
                f"{fn_base}.txt"), "w"
        )

        self._offset = 0
        summary = []
        # We'd like to use the concat filter here and connect everything into a
        # single filter graph... but it produces a "Resource temporarily
        # unavailable" error when switching to inputs after the first.
        # Presumably fixable, but it's easier to just make one graph per video
        # and mux everything together at the end.
        # TODO: Support UDoom in literally any way.
        d2maps = (
            [str(x).zfill(2) for x in range(1, 16)]
            + ["31", "32"]
            + [str(x) for x in range(16, 31)]
        )
        state = State.NOT_STARTED
        for idx in d2maps:
            if idx == parsed_args.start_map:
                state = State.STARTED
            if idx == parsed_args.end_map:
                state = State.DONE
            if state == State.NOT_STARTED:
                continue
            start_time = self._offset / 1000000
            text = self._add_chunk(
                videodir.joinpath(f"{parsed_args.wad}_map{idx}.mp4"),
                output, not parsed_args.nooverlay
            )
            list.append(
                summary, f"{text} {math.floor(start_time / 60):02}:"
                + f"{math.floor(start_time % 60):02}"
            )
            if state == State.DONE:
                break
        output.close()

        for line in summary:
            summary_file.write(f"{line}\n")
        summary_file.close()

    def _add_chunk(self, v, output, overlay):
        chunk = av.open(v)
        if not (len(chunk.streams.video) == 1
                and len(chunk.streams.audio) == 1):
            raise Exception(
                f"irregular chunk {v}: streams {chunk.streams} "
                + f"(expected 1 video & 1 audio)"
            )

        ograph = av.filter.Graph()
        sink = ograph.add("buffersink")
        asink = ograph.add("abuffersink")

        text = ""
        if overlay:
            img = wand.image.Image(
                height=chunk.streams[0].height,
                width=chunk.streams[0].width
            )
            mapstring = v.name[-6:-4]
            text = self._config["map_names"][f"map{mapstring}"]
            self.draw_text(
                img,
                f"MAP{mapstring}: {text}",
                font_size=120
            )
            img.trim(reset_coords=True)
            img.border("graya(25%, 25%)", 10, 10)
            img.border(self.thumbnail_text_stroke, 16, 16)
            # for this to work... the image needs to have a width that's a
            # multiple of 8.  dude whyyyyyyy
            padfactor = 8
            img.border("transparent", padfactor, 0)
            img.crop(
                width=img.width - img.width % padfactor,
                height=img.height
            )

        if len(output.streams.get()) == 0:
            # We can't use the input stream as a template here; it doesn't
            # have everything needed to do encoding and will fail
            # mysteriously later.
            vs = chunk.streams.video[0]
            vr = int(vs.time_base.denominator/vs.time_base.numerator)
            ovs = output.add_stream("h264", rate=vr)
            ovs.extradata = copy.deepcopy(vs.extradata)
            ovs.height = vs.height
            ovs.width = vs.width
            ovs.qmax = vs.qmax
            ovs.qmin = vs.qmin
            ovs.codec_context.bit_rate = vs.codec_context.bit_rate
            ovs.codec_context.framerate = vs.base_rate
            ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt
            # The following are only used for encoding and have no equivalent
            # on the input stream.
            ovs.profile = "High"
            ovs.codec_context.gop_size = 30
            ovs.codec_context.max_b_frames = 2

            astr = chunk.streams.audio[0]
            oas = output.add_stream("aac", rate=astr.rate)
            oas.extradata = copy.deepcopy(astr.extradata)
            oas.bit_rate = astr.bit_rate

        src = ograph.add_buffer(
            template=chunk.streams.video[0],
            time_base=chunk.streams.video[0].time_base
        )
        asrc = ograph.add_abuffer(
            template=chunk.streams.audio[0],
            time_base=chunk.streams.audio[0].time_base
        )
        # TODO: video fades are absolute relative to the input video; audio
        # fades need to have their timestamps offset by the position in the
        # final video.  Clarify if this is really necessary.
        frame_rate = chunk.streams.video[0].base_rate
        sample_rate = chunk.streams.audio[0].rate
        ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
        ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate
        ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}")
        iafade_start = self._offset * sample_rate / 1000000
        iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}")
        oafade_start = (
            (self._offset + chunk.duration) * sample_rate / 1000000
            - sample_rate
        )
        oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}")

        if overlay:
            overlay = ograph.add_buffer(
                width=img.width, height=img.height,
                format="rgba", time_base=chunk.streams.video[0].time_base
            )
            overlay_fo = ograph.add(
                "fade", args=f"out:{4 * frame_rate}:{frame_rate}"
            )
            overlay.link_to(overlay_fo, 0, 0)
            composite = ograph.add("overlay", args="x=4:y=4")
            src.link_to(composite, 0, 0)
            overlay_fo.link_to(composite, 0, 1)
            composite.link_to(ifade, 0, 0)
        else:
            src.link_to(ifade, 0, 0)

        asrc.link_to(iafade, 0, 0)
        ifade.link_to(ofade, 0, 0)
        iafade.link_to(oafade, 0, 0)
        ofade.link_to(sink, 0, 0)
        oafade.link_to(asink, 0, 0)
        ograph.configure()

        for packet in chunk.demux():
            if packet.dts is None:
                continue
            pof = (
                (self._offset * packet.time_base.denominator)
                / (packet.time_base.numerator * 1000000)
            )
            packet.dts += pof
            packet.pts += pof
            if packet.stream == chunk.streams.video[0]:
                for ifr in packet.decode():
                    if overlay:
                        overlay.push(self._make_text_frame(img, ifr))
                    src.push(ifr)
                    ofr = sink.pull()
                    for p in output.streams[packet.stream_index].encode(ofr):
                        output.mux(p)
            else:
                for ifr in packet.decode():
                    asrc.push(ifr)
                    ofr = asink.pull()
                    for p in output.streams[packet.stream_index].encode(ofr):
                        output.mux(p)
        self._offset += chunk.duration
        chunk.close()

        return text

    def _make_text_frame(self, img, ifr):
        # We need to give each frame its own memory it can own.
        text_frame = av.video.frame.VideoFrame(
            img.width, img.height, format="rgba"
        )
        text_frame.planes[0].update(img.make_blob(format="rgba"))
        text_frame.pts = ifr.pts
        text_frame.dts = ifr.dts
        text_frame.time_base = ifr.time_base
        return text_frame
At long last, support concatenating videos together. 2025-06-02 02:05:57 -04:00			`import av`
			`import copy`
			`import dcc.doom_base`
Be more particular about the ordering of videos. This just hardcodes the default Doom 2 ordering of videos; this will likely need further expansion in the future. 2025-08-29 19:19:36 -04:00			`import enum`
At long last, support concatenating videos together. 2025-06-02 02:05:57 -04:00			`import fractions`
			`import io`
			`import logging`
			`import math`
			`import numpy as np`
			`import wand.image`

Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00
Be more particular about the ordering of videos. This just hardcodes the default Doom 2 ordering of videos; this will likely need further expansion in the future. 2025-08-29 19:19:36 -04:00			`class State(enum.Enum):`
			`NOT_STARTED = 1`
			`STARTED = 2`
			`DONE = 3`

PEP 8 compliance. 2025-09-01 19:03:57 -04:00
At long last, support concatenating videos together. 2025-06-02 02:05:57 -04:00			`class Concat(dcc.doom_base.Wad):`
Hardcode fewer things. Convert tabs to spaces. 2025-06-12 23:47:22 -04:00			`def get_parser(self, prog_name):`
			`parser = super().get_parser(prog_name)`
			`parser.add_argument("start_map")`
			`parser.add_argument("end_map")`
			`parser.add_argument("-n", "--nooverlay", action="store_true")`
			`return parser`
At long last, support concatenating videos together. 2025-06-02 02:05:57 -04:00
Hardcode fewer things. Convert tabs to spaces. 2025-06-12 23:47:22 -04:00			`def take_action(self, parsed_args):`
			`logging.basicConfig()`
			`av.logging.set_level(av.logging.VERBOSE)`
			`av.logging.restore_default_callback()`
Be more particular about the ordering of videos. This just hardcodes the default Doom 2 ordering of videos; this will likely need further expansion in the future. 2025-08-29 19:19:36 -04:00			`videodir = self.fabricate.joinpath(parsed_args.wad)`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`fn_base = (`
			`f"{parsed_args.wad}_maps{parsed_args.start_map}"`
			`+ f"to{parsed_args.end_map}"`
			`)`
			`output = av.open(`
			`self.fabricate.joinpath(parsed_args.wad).joinpath(`
			`f"{fn_base}.mp4"), "w"`
			`)`
			`summary_file = open(`
			`self.fabricate.joinpath(parsed_args.wad).joinpath(`
			`f"{fn_base}.txt"), "w"`
			`)`

			`self._offset = 0`
			`summary = []`
Hardcode fewer things. Convert tabs to spaces. 2025-06-12 23:47:22 -04:00			`# We'd like to use the concat filter here and connect everything into a`
			`# single filter graph... but it produces a "Resource temporarily`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`# unavailable" error when switching to inputs after the first.`
			`# Presumably fixable, but it's easier to just make one graph per video`
			`# and mux everything together at the end.`
Be more particular about the ordering of videos. This just hardcodes the default Doom 2 ordering of videos; this will likely need further expansion in the future. 2025-08-29 19:19:36 -04:00			`# TODO: Support UDoom in literally any way.`
PEP 8 compliance. 2025-09-01 19:03:57 -04:00			`d2maps = (`
			`[str(x).zfill(2) for x in range(1, 16)]`
			`+ ["31", "32"]`
			`+ [str(x) for x in range(16, 31)]`
			`)`
Be more particular about the ordering of videos. This just hardcodes the default Doom 2 ordering of videos; this will likely need further expansion in the future. 2025-08-29 19:19:36 -04:00			`state = State.NOT_STARTED`
			`for idx in d2maps:`
			`if idx == parsed_args.start_map:`
			`state = State.STARTED`
			`if idx == parsed_args.end_map:`
			`state = State.DONE`
			`if state == State.NOT_STARTED:`
Hardcode fewer things. Convert tabs to spaces. 2025-06-12 23:47:22 -04:00			`continue`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`start_time = self._offset / 1000000`
Be more particular about the ordering of videos. This just hardcodes the default Doom 2 ordering of videos; this will likely need further expansion in the future. 2025-08-29 19:19:36 -04:00			`text = self._add_chunk(`
			`videodir.joinpath(f"{parsed_args.wad}_map{idx}.mp4"),`
			`output, not parsed_args.nooverlay`
			`)`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`list.append(`
			`summary, f"{text} {math.floor(start_time / 60):02}:"`
			`+ f"{math.floor(start_time % 60):02}"`
			`)`
Be more particular about the ordering of videos. This just hardcodes the default Doom 2 ordering of videos; this will likely need further expansion in the future. 2025-08-29 19:19:36 -04:00			`if state == State.DONE:`
			`break`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`output.close()`

			`for line in summary:`
PEP 8 compliance. 2025-07-03 18:05:12 -04:00			`summary_file.write(f"{line}\n")`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`summary_file.close()`

			`def _add_chunk(self, v, output, overlay):`
			`chunk = av.open(v)`
			`if not (len(chunk.streams.video) == 1`
			`and len(chunk.streams.audio) == 1):`
			`raise Exception(`
			`f"irregular chunk {v}: streams {chunk.streams} "`
			`+ f"(expected 1 video & 1 audio)"`
			`)`

			`ograph = av.filter.Graph()`
			`sink = ograph.add("buffersink")`
			`asink = ograph.add("abuffersink")`

			`text = ""`
			`if overlay:`
			`img = wand.image.Image(`
			`height=chunk.streams[0].height,`
			`width=chunk.streams[0].width`
			`)`
			`mapstring = v.name[-6:-4]`
			`text = self._config["map_names"][f"map{mapstring}"]`
Monkey patch in draw_text to save us some trouble (and avoid loading imagemagick stuff unless we really need it). 2025-07-04 12:08:25 -04:00			`self.draw_text(`
Actually load the values for screenshot text, etc. correctly from the config. Pass them into draw_text which doesn't have them readily available. 2025-07-04 11:50:10 -04:00			`img,`
			`f"MAP{mapstring}: {text}",`
			`font_size=120`
			`)`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`img.trim(reset_coords=True)`
			`img.border("graya(25%, 25%)", 10, 10)`
Fix text stroke color reference in the concat command. 2025-07-20 01:52:50 -04:00			`img.border(self.thumbnail_text_stroke, 16, 16)`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`# for this to work... the image needs to have a width that's a`
			`# multiple of 8. dude whyyyyyyy`
			`padfactor = 8`
			`img.border("transparent", padfactor, 0)`
			`img.crop(`
			`width=img.width - img.width % padfactor,`
			`height=img.height`
			`)`

			`if len(output.streams.get()) == 0:`
			`# We can't use the input stream as a template here; it doesn't`
			`# have everything needed to do encoding and will fail`
			`# mysteriously later.`
			`vs = chunk.streams.video[0]`
			`vr = int(vs.time_base.denominator/vs.time_base.numerator)`
			`ovs = output.add_stream("h264", rate=vr)`
			`ovs.extradata = copy.deepcopy(vs.extradata)`
			`ovs.height = vs.height`
			`ovs.width = vs.width`
			`ovs.qmax = vs.qmax`
			`ovs.qmin = vs.qmin`
			`ovs.codec_context.bit_rate = vs.codec_context.bit_rate`
			`ovs.codec_context.framerate = vs.base_rate`
			`ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt`
			`# The following are only used for encoding and have no equivalent`
			`# on the input stream.`
			`ovs.profile = "High"`
			`ovs.codec_context.gop_size = 30`
			`ovs.codec_context.max_b_frames = 2`
At long last, support concatenating videos together. 2025-06-02 02:05:57 -04:00
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`astr = chunk.streams.audio[0]`
			`oas = output.add_stream("aac", rate=astr.rate)`
			`oas.extradata = copy.deepcopy(astr.extradata)`
			`oas.bit_rate = astr.bit_rate`

			`src = ograph.add_buffer(`
			`template=chunk.streams.video[0],`
			`time_base=chunk.streams.video[0].time_base`
			`)`
			`asrc = ograph.add_abuffer(`
			`template=chunk.streams.audio[0],`
			`time_base=chunk.streams.audio[0].time_base`
			`)`
			`# TODO: video fades are absolute relative to the input video; audio`
			`# fades need to have their timestamps offset by the position in the`
			`# final video. Clarify if this is really necessary.`
			`frame_rate = chunk.streams.video[0].base_rate`
			`sample_rate = chunk.streams.audio[0].rate`
			`ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))`
			`ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate`
			`ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}")`
			`iafade_start = self._offset * sample_rate / 1000000`
			`iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}")`
			`oafade_start = (`
			`(self._offset + chunk.duration) * sample_rate / 1000000`
			`- sample_rate`
			`)`
			`oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}")`

			`if overlay:`
			`overlay = ograph.add_buffer(`
			`width=img.width, height=img.height,`
			`format="rgba", time_base=chunk.streams.video[0].time_base`
			`)`
			`overlay_fo = ograph.add(`
			`"fade", args=f"out:{4 * frame_rate}:{frame_rate}"`
			`)`
			`overlay.link_to(overlay_fo, 0, 0)`
			`composite = ograph.add("overlay", args="x=4:y=4")`
			`src.link_to(composite, 0, 0)`
			`overlay_fo.link_to(composite, 0, 1)`
			`composite.link_to(ifade, 0, 0)`
			`else:`
			`src.link_to(ifade, 0, 0)`

			`asrc.link_to(iafade, 0, 0)`
			`ifade.link_to(ofade, 0, 0)`
			`iafade.link_to(oafade, 0, 0)`
			`ofade.link_to(sink, 0, 0)`
			`oafade.link_to(asink, 0, 0)`
			`ograph.configure()`

			`for packet in chunk.demux():`
			`if packet.dts is None:`
			`continue`
			`pof = (`
			`(self._offset * packet.time_base.denominator)`
			`/ (packet.time_base.numerator * 1000000)`
			`)`
			`packet.dts += pof`
			`packet.pts += pof`
			`if packet.stream == chunk.streams.video[0]:`
			`for ifr in packet.decode():`
			`if overlay:`
			`overlay.push(self._make_text_frame(img, ifr))`
			`src.push(ifr)`
			`ofr = sink.pull()`
			`for p in output.streams[packet.stream_index].encode(ofr):`
			`output.mux(p)`
Hardcode fewer things. Convert tabs to spaces. 2025-06-12 23:47:22 -04:00			`else:`
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`for ifr in packet.decode():`
			`asrc.push(ifr)`
			`ofr = asink.pull()`
			`for p in output.streams[packet.stream_index].encode(ofr):`
			`output.mux(p)`
			`self._offset += chunk.duration`
			`chunk.close()`

			`return text`
At long last, support concatenating videos together. 2025-06-02 02:05:57 -04:00
Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. 2025-06-14 02:55:43 -04:00			`def _make_text_frame(self, img, ifr):`
			`# We need to give each frame its own memory it can own.`
			`text_frame = av.video.frame.VideoFrame(`
			`img.width, img.height, format="rgba"`
			`)`
			`text_frame.planes[0].update(img.make_blob(format="rgba"))`
			`text_frame.pts = ifr.pts`
			`text_frame.dts = ifr.dts`
			`text_frame.time_base = ifr.time_base`
			`return text_frame`