import av import copy import dcc.doom_base import fractions import io import logging import math import numpy as np import wand.image class Concat(dcc.doom_base.Wad): def get_parser(self, prog_name): parser = super().get_parser(prog_name) parser.add_argument("start_map") parser.add_argument("end_map") parser.add_argument("-n", "--nooverlay", action="store_true") return parser def take_action(self, parsed_args): logging.basicConfig() av.logging.set_level(av.logging.VERBOSE) av.logging.restore_default_callback() videos = ( self.fabricate.joinpath(parsed_args.wad) .glob(f"{parsed_args.wad}_map*.mp4") ) fn_base = ( f"{parsed_args.wad}_maps{parsed_args.start_map}" + f"to{parsed_args.end_map}" ) output = av.open( self.fabricate.joinpath(parsed_args.wad).joinpath( f"{fn_base}.mp4"), "w" ) summary_file = open( self.fabricate.joinpath(parsed_args.wad).joinpath( f"{fn_base}.txt"), "w" ) self._offset = 0 summary = [] # We'd like to use the concat filter here and connect everything into a # single filter graph... but it produces a "Resource temporarily # unavailable" error when switching to inputs after the first. # Presumably fixable, but it's easier to just make one graph per video # and mux everything together at the end. for v in sorted(videos): # TODO: Support UDoom in literally any way. if not ( v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4" ): continue start_time = self._offset / 1000000 text = self._add_chunk(v, output, not parsed_args.nooverlay) list.append( summary, f"{text} {math.floor(start_time / 60):02}:" + f"{math.floor(start_time % 60):02}" ) output.close() for line in summary: summary_file.write(f"{line}\n") summary_file.close() def _add_chunk(self, v, output, overlay): chunk = av.open(v) if not (len(chunk.streams.video) == 1 and len(chunk.streams.audio) == 1): raise Exception( f"irregular chunk {v}: streams {chunk.streams} " + f"(expected 1 video & 1 audio)" ) ograph = av.filter.Graph() sink = ograph.add("buffersink") asink = ograph.add("abuffersink") text = "" if overlay: img = wand.image.Image( height=chunk.streams[0].height, width=chunk.streams[0].width ) mapstring = v.name[-6:-4] text = self._config["map_names"][f"map{mapstring}"] self.draw_text( img, f"MAP{mapstring}: {text}", font_size=120 ) img.trim(reset_coords=True) img.border("graya(25%, 25%)", 10, 10) img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16) # for this to work... the image needs to have a width that's a # multiple of 8. dude whyyyyyyy padfactor = 8 img.border("transparent", padfactor, 0) img.crop( width=img.width - img.width % padfactor, height=img.height ) if len(output.streams.get()) == 0: # We can't use the input stream as a template here; it doesn't # have everything needed to do encoding and will fail # mysteriously later. vs = chunk.streams.video[0] vr = int(vs.time_base.denominator/vs.time_base.numerator) ovs = output.add_stream("h264", rate=vr) ovs.extradata = copy.deepcopy(vs.extradata) ovs.height = vs.height ovs.width = vs.width ovs.qmax = vs.qmax ovs.qmin = vs.qmin ovs.codec_context.bit_rate = vs.codec_context.bit_rate ovs.codec_context.framerate = vs.base_rate ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt # The following are only used for encoding and have no equivalent # on the input stream. ovs.profile = "High" ovs.codec_context.gop_size = 30 ovs.codec_context.max_b_frames = 2 astr = chunk.streams.audio[0] oas = output.add_stream("aac", rate=astr.rate) oas.extradata = copy.deepcopy(astr.extradata) oas.bit_rate = astr.bit_rate src = ograph.add_buffer( template=chunk.streams.video[0], time_base=chunk.streams.video[0].time_base ) asrc = ograph.add_abuffer( template=chunk.streams.audio[0], time_base=chunk.streams.audio[0].time_base ) # TODO: video fades are absolute relative to the input video; audio # fades need to have their timestamps offset by the position in the # final video. Clarify if this is really necessary. frame_rate = chunk.streams.video[0].base_rate sample_rate = chunk.streams.audio[0].rate ifade = ograph.add("fade", args="in:0:{}".format(frame_rate)) ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}") iafade_start = self._offset * sample_rate / 1000000 iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}") oafade_start = ( (self._offset + chunk.duration) * sample_rate / 1000000 - sample_rate ) oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}") if overlay: overlay = ograph.add_buffer( width=img.width, height=img.height, format="rgba", time_base=chunk.streams.video[0].time_base ) overlay_fo = ograph.add( "fade", args=f"out:{4 * frame_rate}:{frame_rate}" ) overlay.link_to(overlay_fo, 0, 0) composite = ograph.add("overlay", args="x=4:y=4") src.link_to(composite, 0, 0) overlay_fo.link_to(composite, 0, 1) composite.link_to(ifade, 0, 0) else: src.link_to(ifade, 0, 0) asrc.link_to(iafade, 0, 0) ifade.link_to(ofade, 0, 0) iafade.link_to(oafade, 0, 0) ofade.link_to(sink, 0, 0) oafade.link_to(asink, 0, 0) ograph.configure() for packet in chunk.demux(): if packet.dts is None: continue pof = ( (self._offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000) ) packet.dts += pof packet.pts += pof if packet.stream == chunk.streams.video[0]: for ifr in packet.decode(): if overlay: overlay.push(self._make_text_frame(img, ifr)) src.push(ifr) ofr = sink.pull() for p in output.streams[packet.stream_index].encode(ofr): output.mux(p) else: for ifr in packet.decode(): asrc.push(ifr) ofr = asink.pull() for p in output.streams[packet.stream_index].encode(ofr): output.mux(p) self._offset += chunk.duration chunk.close() return text def _make_text_frame(self, img, ifr): # We need to give each frame its own memory it can own. text_frame = av.video.frame.VideoFrame( img.width, img.height, format="rgba" ) text_frame.planes[0].update(img.make_blob(format="rgba")) text_frame.pts = ifr.pts text_frame.dts = ifr.dts text_frame.time_base = ifr.time_base return text_frame