diff --git a/dcc/concat.py b/dcc/concat.py new file mode 100644 index 0000000..46df15a --- /dev/null +++ b/dcc/concat.py @@ -0,0 +1,119 @@ +import av +import copy +import dcc.doom_base +import fractions +import io +import logging +import math +import numpy as np +import wand.image + +class Concat(dcc.doom_base.Wad): + def get_parser(self, prog_name): + parser = super().get_parser(prog_name) + parser.add_argument("start_map") + parser.add_argument("end_map") + return parser + + def take_action(self, parsed_args): + logging.basicConfig() + av.logging.set_level(av.logging.VERBOSE) + av.logging.restore_default_callback() + videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4") + output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w") + offset = 0 + # We'd like to use the concat filter here and connect everything into a + # single filter graph... but it produces a "Resource temporarily + # unavailable" error when switching to inputs after the first. Presumably + # fixable, but it's easier to just make one graph per video and mux + # everything together at the end. + for v in sorted(videos): + # TODO: Support UDoom in literally any way. + if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and + v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"): + continue + + chunk = av.open(v) + ograph = av.filter.Graph() + sink = ograph.add("buffersink") + asink = ograph.add("abuffersink") + + img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width) + mapstring = v.name[-6:-4] + text = self._config["map_names"][f"map{mapstring}"] + dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120) + img.trim(reset_coords=True) + img.border("graya(25%, 25%)", 10, 10) + img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16) + # for this to work... the image needs to have a width that's a multiple + # of 8. dude whyyyyyyy + padfactor=8 + img.border("transparent", padfactor, 0) + img.crop(width=img.width-img.width%padfactor, height=img.height) + text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba") + + if len(output.streams.get()) == 0: + # TODO: less hardcoding. + output.add_stream("h264", rate=61440) + output.streams[0].extradata = copy.deepcopy(chunk.streams[0].extradata) + output.streams[0].height=1440 + output.streams[0].width=2560 + output.streams[0].profile="High" + output.streams[0].qmax = chunk.streams[0].qmax + output.streams[0].qmin = chunk.streams[0].qmin + output.streams[0].codec_context.gop_size=30 + output.streams[0].codec_context.max_b_frames=2 + output.streams[0].codec_context.framerate = fractions.Fraction(60,1) + output.streams[0].codec_context.pix_fmt="yuv420p" + output.streams[0].codec_context.bit_rate = chunk.streams[0].codec_context.bit_rate + output.add_stream("aac", rate=48000) + output.streams[1].extradata = copy.deepcopy(output.streams[1].extradata) + output.streams[1].rate=48000 + output.streams[1].bit_rate=chunk.streams[1].bit_rate + src = ograph.add_buffer(template=chunk.streams[0], time_base=chunk.streams[0].time_base) + asrc = ograph.add_abuffer(template=chunk.streams[1], time_base=chunk.streams[1].time_base) + overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base) + overlay_fo = ograph.add("fade", args="out:240:60") + overlay.link_to(overlay_fo, 0, 0) + composite = ograph.add("overlay", args="x=4:y=4") + src.link_to(composite, 0, 0) + overlay_fo.link_to(composite, 0, 1) + ifade = ograph.add("fade", args="in:0:60") + iafade = ograph.add("afade", args="in:{}:48000".format(offset*48000/1000000)) + ofade = ograph.add("fade", args="out:{}:60".format((chunk.duration*60/1000000)-60)) + oafade = ograph.add("afade", args="out:{}:48000".format(((offset+chunk.duration)*48000/1000000)-48000)) + composite.link_to(ifade, 0, 0) + asrc.link_to(iafade, 0, 0) + ifade.link_to(ofade, 0, 0) + iafade.link_to(oafade, 0, 0) + ofade.link_to(sink, 0, 0) + oafade.link_to(asink, 0, 0) + + ograph.configure() + for packet in chunk.demux(): + if packet.dts is None: + continue + packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000) + packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000) + if packet.stream_index == 0: # TODO: robustness + for ifr in packet.decode(): + text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba") + text_frame.planes[0].update(img.make_blob(format="rgba")) + text_frame.pts = ifr.pts + text_frame.dts = ifr.dts + text_frame.time_base = ifr.time_base + overlay.push(text_frame) + src.push(ifr) + ofr = sink.pull() + for p in output.streams[packet.stream_index].encode(ofr): + output.mux(p) + else: + for ifr in packet.decode(): + asrc.push(ifr) + ofr = asink.pull() + for p in output.streams[packet.stream_index].encode(ofr): + output.mux(p) + offset += chunk.duration + chunk.close() + output.close() +