From e8d06c616f1570a21410ad9eec860c9f503dec9c Mon Sep 17 00:00:00 2001 From: yrriban Date: Sat, 14 Jun 2025 02:55:43 -0400 Subject: [PATCH] Generally streamline and break the concat routine into functions. Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers. --- dcc/concat.py | 292 +++++++++++++++++++++++++++++++------------------- 1 file changed, 182 insertions(+), 110 deletions(-) diff --git a/dcc/concat.py b/dcc/concat.py index 764f977..1f28199 100644 --- a/dcc/concat.py +++ b/dcc/concat.py @@ -8,6 +8,7 @@ import math import numpy as np import wand.image + class Concat(dcc.doom_base.Wad): def get_parser(self, prog_name): parser = super().get_parser(prog_name) @@ -20,120 +21,191 @@ class Concat(dcc.doom_base.Wad): logging.basicConfig() av.logging.set_level(av.logging.VERBOSE) av.logging.restore_default_callback() - videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4") - output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w") - offset = 0 + videos = ( + self.fabricate.joinpath(parsed_args.wad) + .glob(f"{parsed_args.wad}_map*.mp4") + ) + fn_base = ( + f"{parsed_args.wad}_maps{parsed_args.start_map}" + + f"to{parsed_args.end_map}" + ) + output = av.open( + self.fabricate.joinpath(parsed_args.wad).joinpath( + f"{fn_base}.mp4"), "w" + ) + summary_file = open( + self.fabricate.joinpath(parsed_args.wad).joinpath( + f"{fn_base}.txt"), "w" + ) + + self._offset = 0 + summary = [] # We'd like to use the concat filter here and connect everything into a # single filter graph... but it produces a "Resource temporarily - # unavailable" error when switching to inputs after the first. Presumably - # fixable, but it's easier to just make one graph per video and mux - # everything together at the end. + # unavailable" error when switching to inputs after the first. + # Presumably fixable, but it's easier to just make one graph per video + # and mux everything together at the end. for v in sorted(videos): # TODO: Support UDoom in literally any way. - if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and - v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"): + if not ( + v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" + and v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4" + ): continue - - chunk = av.open(v) - if not (len(chunk.streams.video) == 1 and len(chunk.streams.audio) == 1): - raise Exception(f"irregular chunk {v}: streams {chunk.streams} (expected 1 video & 1 audio)") - - ograph = av.filter.Graph() - sink = ograph.add("buffersink") - asink = ograph.add("abuffersink") - - if not parsed_args.nooverlay: - img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width) - mapstring = v.name[-6:-4] - text = self._config["map_names"][f"map{mapstring}"] - dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120) - img.trim(reset_coords=True) - img.border("graya(25%, 25%)", 10, 10) - img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16) - # for this to work... the image needs to have a width that's a multiple - # of 8. dude whyyyyyyy - padfactor=8 - img.border("transparent", padfactor, 0) - img.crop(width=img.width-img.width%padfactor, height=img.height) - - if len(output.streams.get()) == 0: - # We can't use the input stream as a template here; it doesn't - # have everything needed to do encoding and will fail - # mysteriously later. - vs = chunk.streams.video[0] - output.add_stream("h264", rate=int(vs.time_base.denominator/vs.time_base.numerator)) - output.streams[0].extradata = copy.deepcopy(vs.extradata) - output.streams[0].height=vs.height - output.streams[0].width=vs.width - output.streams[0].qmax = vs.qmax - output.streams[0].qmin = vs.qmin - output.streams[0].codec_context.bit_rate = vs.codec_context.bit_rate - output.streams[0].codec_context.framerate = vs.base_rate - output.streams[0].codec_context.pix_fmt = vs.codec_context.pix_fmt - # The following are only used for encoding and have no equivalent on the input stream. - output.streams[0].profile="High" - output.streams[0].codec_context.gop_size=30 - output.streams[0].codec_context.max_b_frames=2 - - astr = chunk.streams.audio[0] - output.add_stream("aac", rate=astr.rate) - output.streams[1].extradata = copy.deepcopy(astr.extradata) - output.streams[1].bit_rate=astr.bit_rate - - src = ograph.add_buffer(template=chunk.streams.video[0], time_base=chunk.streams.video[0].time_base) - asrc = ograph.add_abuffer(template=chunk.streams.audio[0], time_base=chunk.streams.audio[0].time_base) - # TODO: video fades are absolute relative to the input video; audio - # fades need to have their timestamps offset by the position in the - # final video. Clarify if this is really necessary. - frame_rate = chunk.streams.video[0].base_rate - sample_rate = chunk.streams.audio[0].rate - ifade = ograph.add("fade", args="in:0:{}".format(frame_rate)) - ofade = ograph.add("fade", args="out:{}:{}".format((chunk.duration*frame_rate/1000000)-frame_rate, frame_rate)) - iafade = ograph.add("afade", args="in:{}:{}".format(offset*sample_rate/1000000, sample_rate)) - oafade = ograph.add("afade", args="out:{}:{}".format(((offset+chunk.duration)*sample_rate/1000000)-sample_rate, sample_rate)) - if not parsed_args.nooverlay: - overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base) - overlay_fo = ograph.add("fade", args="out:{}:{}".format(4*frame_rate, frame_rate)) - overlay.link_to(overlay_fo, 0, 0) - composite = ograph.add("overlay", args="x=4:y=4") - src.link_to(composite, 0, 0) - overlay_fo.link_to(composite, 0, 1) - composite.link_to(ifade, 0, 0) - else: - src.link_to(ifade, 0, 0) - - asrc.link_to(iafade, 0, 0) - ifade.link_to(ofade, 0, 0) - iafade.link_to(oafade, 0, 0) - ofade.link_to(sink, 0, 0) - oafade.link_to(asink, 0, 0) - ograph.configure() - - for packet in chunk.demux(): - if packet.dts is None: - continue - packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000) - packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000) - if packet.stream == chunk.streams.video[0]: - for ifr in packet.decode(): - if not parsed_args.nooverlay: - text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba") - text_frame.planes[0].update(img.make_blob(format="rgba")) - text_frame.pts = ifr.pts - text_frame.dts = ifr.dts - text_frame.time_base = ifr.time_base - overlay.push(text_frame) - src.push(ifr) - ofr = sink.pull() - for p in output.streams[packet.stream_index].encode(ofr): - output.mux(p) - else: - for ifr in packet.decode(): - asrc.push(ifr) - ofr = asink.pull() - for p in output.streams[packet.stream_index].encode(ofr): - output.mux(p) - offset += chunk.duration - chunk.close() + start_time = self._offset / 1000000 + text = self._add_chunk(v, output, not parsed_args.nooverlay) + list.append( + summary, f"{text} {math.floor(start_time / 60):02}:" + + f"{math.floor(start_time % 60):02}" + ) output.close() + for line in summary: + summary_file.write(f"{line}\n") + summary_file.close() + + def _add_chunk(self, v, output, overlay): + chunk = av.open(v) + if not (len(chunk.streams.video) == 1 + and len(chunk.streams.audio) == 1): + raise Exception( + f"irregular chunk {v}: streams {chunk.streams} " + + f"(expected 1 video & 1 audio)" + ) + + ograph = av.filter.Graph() + sink = ograph.add("buffersink") + asink = ograph.add("abuffersink") + + text = "" + if overlay: + img = wand.image.Image( + height=chunk.streams[0].height, + width=chunk.streams[0].width + ) + mapstring = v.name[-6:-4] + text = self._config["map_names"][f"map{mapstring}"] + dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120) + img.trim(reset_coords=True) + img.border("graya(25%, 25%)", 10, 10) + img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16) + # for this to work... the image needs to have a width that's a + # multiple of 8. dude whyyyyyyy + padfactor = 8 + img.border("transparent", padfactor, 0) + img.crop( + width=img.width - img.width % padfactor, + height=img.height + ) + + if len(output.streams.get()) == 0: + # We can't use the input stream as a template here; it doesn't + # have everything needed to do encoding and will fail + # mysteriously later. + vs = chunk.streams.video[0] + vr = int(vs.time_base.denominator/vs.time_base.numerator) + ovs = output.add_stream("h264", rate=vr) + ovs.extradata = copy.deepcopy(vs.extradata) + ovs.height = vs.height + ovs.width = vs.width + ovs.qmax = vs.qmax + ovs.qmin = vs.qmin + ovs.codec_context.bit_rate = vs.codec_context.bit_rate + ovs.codec_context.framerate = vs.base_rate + ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt + # The following are only used for encoding and have no equivalent + # on the input stream. + ovs.profile = "High" + ovs.codec_context.gop_size = 30 + ovs.codec_context.max_b_frames = 2 + + astr = chunk.streams.audio[0] + oas = output.add_stream("aac", rate=astr.rate) + oas.extradata = copy.deepcopy(astr.extradata) + oas.bit_rate = astr.bit_rate + + src = ograph.add_buffer( + template=chunk.streams.video[0], + time_base=chunk.streams.video[0].time_base + ) + asrc = ograph.add_abuffer( + template=chunk.streams.audio[0], + time_base=chunk.streams.audio[0].time_base + ) + # TODO: video fades are absolute relative to the input video; audio + # fades need to have their timestamps offset by the position in the + # final video. Clarify if this is really necessary. + frame_rate = chunk.streams.video[0].base_rate + sample_rate = chunk.streams.audio[0].rate + ifade = ograph.add("fade", args="in:0:{}".format(frame_rate)) + ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate + ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}") + iafade_start = self._offset * sample_rate / 1000000 + iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}") + oafade_start = ( + (self._offset + chunk.duration) * sample_rate / 1000000 + - sample_rate + ) + oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}") + + if overlay: + overlay = ograph.add_buffer( + width=img.width, height=img.height, + format="rgba", time_base=chunk.streams.video[0].time_base + ) + overlay_fo = ograph.add( + "fade", args=f"out:{4 * frame_rate}:{frame_rate}" + ) + overlay.link_to(overlay_fo, 0, 0) + composite = ograph.add("overlay", args="x=4:y=4") + src.link_to(composite, 0, 0) + overlay_fo.link_to(composite, 0, 1) + composite.link_to(ifade, 0, 0) + else: + src.link_to(ifade, 0, 0) + + asrc.link_to(iafade, 0, 0) + ifade.link_to(ofade, 0, 0) + iafade.link_to(oafade, 0, 0) + ofade.link_to(sink, 0, 0) + oafade.link_to(asink, 0, 0) + ograph.configure() + + for packet in chunk.demux(): + if packet.dts is None: + continue + pof = ( + (self._offset * packet.time_base.denominator) + / (packet.time_base.numerator * 1000000) + ) + packet.dts += pof + packet.pts += pof + if packet.stream == chunk.streams.video[0]: + for ifr in packet.decode(): + if overlay: + overlay.push(self._make_text_frame(img, ifr)) + src.push(ifr) + ofr = sink.pull() + for p in output.streams[packet.stream_index].encode(ofr): + output.mux(p) + else: + for ifr in packet.decode(): + asrc.push(ifr) + ofr = asink.pull() + for p in output.streams[packet.stream_index].encode(ofr): + output.mux(p) + self._offset += chunk.duration + chunk.close() + + return text + + def _make_text_frame(self, img, ifr): + # We need to give each frame its own memory it can own. + text_frame = av.video.frame.VideoFrame( + img.width, img.height, format="rgba" + ) + text_frame.planes[0].update(img.make_blob(format="rgba")) + text_frame.pts = ifr.pts + text_frame.dts = ifr.dts + text_frame.time_base = ifr.time_base + return text_frame