doomcc/dcc/concat.py

import av
import copy
import dcc.doom_base
import fractions
import io
import logging
import math
import numpy as np
import wand.image

class Concat(dcc.doom_base.Wad):
	def get_parser(self, prog_name):
		parser = super().get_parser(prog_name)
		parser.add_argument("start_map")
		parser.add_argument("end_map")
		return parser

	def take_action(self, parsed_args):
		logging.basicConfig()
		av.logging.set_level(av.logging.VERBOSE)
		av.logging.restore_default_callback()
		videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4")
		output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w")
		offset = 0
		# We'd like to use the concat filter here and connect everything into a
		# single filter graph... but it produces a "Resource temporarily
		# unavailable" error when switching to inputs after the first.  Presumably
		# fixable, but it's easier to just make one graph per video and mux
		# everything together at the end.
		for v in sorted(videos):
			# TODO: Support UDoom in literally any way.
			if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and
				v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"):
				continue

			chunk = av.open(v)
			ograph = av.filter.Graph()
			sink = ograph.add("buffersink")
			asink = ograph.add("abuffersink")

			img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width)
			mapstring = v.name[-6:-4]
			text = self._config["map_names"][f"map{mapstring}"]
			dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
			img.trim(reset_coords=True)
			img.border("graya(25%, 25%)", 10, 10)
			img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
			# for this to work... the image needs to have a width that's a multiple
			# of 8.  dude whyyyyyyy
			padfactor=8
			img.border("transparent", padfactor, 0)
			img.crop(width=img.width-img.width%padfactor, height=img.height)
			text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")

			if len(output.streams.get()) == 0:
				# TODO: less hardcoding.
				output.add_stream("h264", rate=61440)
				output.streams[0].extradata = copy.deepcopy(chunk.streams[0].extradata)
				output.streams[0].height=1440
				output.streams[0].width=2560
				output.streams[0].profile="High"
				output.streams[0].qmax = chunk.streams[0].qmax
				output.streams[0].qmin = chunk.streams[0].qmin
				output.streams[0].codec_context.gop_size=30
				output.streams[0].codec_context.max_b_frames=2
				output.streams[0].codec_context.framerate = fractions.Fraction(60,1)
				output.streams[0].codec_context.pix_fmt="yuv420p"
				output.streams[0].codec_context.bit_rate = chunk.streams[0].codec_context.bit_rate
				output.add_stream("aac", rate=48000)
				output.streams[1].extradata = copy.deepcopy(output.streams[1].extradata)
				output.streams[1].rate=48000
				output.streams[1].bit_rate=chunk.streams[1].bit_rate
			src = ograph.add_buffer(template=chunk.streams[0], time_base=chunk.streams[0].time_base)
			asrc = ograph.add_abuffer(template=chunk.streams[1], time_base=chunk.streams[1].time_base)
			overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base)
			overlay_fo = ograph.add("fade", args="out:240:60")
			overlay.link_to(overlay_fo, 0, 0)
			composite = ograph.add("overlay", args="x=4:y=4")
			src.link_to(composite, 0, 0)
			overlay_fo.link_to(composite, 0, 1)
			ifade = ograph.add("fade", args="in:0:60")
			iafade = ograph.add("afade", args="in:{}:48000".format(offset*48000/1000000))
			ofade = ograph.add("fade", args="out:{}:60".format((chunk.duration*60/1000000)-60))
			oafade = ograph.add("afade", args="out:{}:48000".format(((offset+chunk.duration)*48000/1000000)-48000))
			composite.link_to(ifade, 0, 0)
			asrc.link_to(iafade, 0, 0)
			ifade.link_to(ofade, 0, 0)
			iafade.link_to(oafade, 0, 0)
			ofade.link_to(sink, 0, 0)
			oafade.link_to(asink, 0, 0)

			ograph.configure()
			for packet in chunk.demux():
				if packet.dts is None:
					continue
				packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
				packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
				if packet.stream_index == 0:  # TODO: robustness
					for ifr in packet.decode():
						text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")
						text_frame.planes[0].update(img.make_blob(format="rgba"))
						text_frame.pts = ifr.pts
						text_frame.dts = ifr.dts
						text_frame.time_base = ifr.time_base
						overlay.push(text_frame)
						src.push(ifr)
						ofr = sink.pull()
						for p in output.streams[packet.stream_index].encode(ofr):
							output.mux(p)
				else:
					for ifr in packet.decode():
						asrc.push(ifr)
						ofr = asink.pull()
						for p in output.streams[packet.stream_index].encode(ofr):
							output.mux(p)
			offset += chunk.duration
			chunk.close()
		output.close()
At long last, support concatenating videos together. 2025-06-02 02:05:57 -04:00			`import av`
			`import copy`
			`import dcc.doom_base`
			`import fractions`
			`import io`
			`import logging`
			`import math`
			`import numpy as np`
			`import wand.image`

			`class Concat(dcc.doom_base.Wad):`
			`def get_parser(self, prog_name):`
			`parser = super().get_parser(prog_name)`
			`parser.add_argument("start_map")`
			`parser.add_argument("end_map")`
			`return parser`

			`def take_action(self, parsed_args):`
			`logging.basicConfig()`
			`av.logging.set_level(av.logging.VERBOSE)`
			`av.logging.restore_default_callback()`
			`videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4")`
			`output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w")`
			`offset = 0`
			`# We'd like to use the concat filter here and connect everything into a`
			`# single filter graph... but it produces a "Resource temporarily`
			`# unavailable" error when switching to inputs after the first. Presumably`
			`# fixable, but it's easier to just make one graph per video and mux`
			`# everything together at the end.`
			`for v in sorted(videos):`
			`# TODO: Support UDoom in literally any way.`
			`if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and`
			`v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"):`
			`continue`

			`chunk = av.open(v)`
			`ograph = av.filter.Graph()`
			`sink = ograph.add("buffersink")`
			`asink = ograph.add("abuffersink")`

			`img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width)`
			`mapstring = v.name[-6:-4]`
			`text = self._config["map_names"][f"map{mapstring}"]`
			`dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)`
			`img.trim(reset_coords=True)`
			`img.border("graya(25%, 25%)", 10, 10)`
			`img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)`
			`# for this to work... the image needs to have a width that's a multiple`
			`# of 8. dude whyyyyyyy`
			`padfactor=8`
			`img.border("transparent", padfactor, 0)`
			`img.crop(width=img.width-img.width%padfactor, height=img.height)`
			`text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")`

			`if len(output.streams.get()) == 0:`
			`# TODO: less hardcoding.`
			`output.add_stream("h264", rate=61440)`
			`output.streams[0].extradata = copy.deepcopy(chunk.streams[0].extradata)`
			`output.streams[0].height=1440`
			`output.streams[0].width=2560`
			`output.streams[0].profile="High"`
			`output.streams[0].qmax = chunk.streams[0].qmax`
			`output.streams[0].qmin = chunk.streams[0].qmin`
			`output.streams[0].codec_context.gop_size=30`
			`output.streams[0].codec_context.max_b_frames=2`
			`output.streams[0].codec_context.framerate = fractions.Fraction(60,1)`
			`output.streams[0].codec_context.pix_fmt="yuv420p"`
			`output.streams[0].codec_context.bit_rate = chunk.streams[0].codec_context.bit_rate`
			`output.add_stream("aac", rate=48000)`
			`output.streams[1].extradata = copy.deepcopy(output.streams[1].extradata)`
			`output.streams[1].rate=48000`
			`output.streams[1].bit_rate=chunk.streams[1].bit_rate`
			`src = ograph.add_buffer(template=chunk.streams[0], time_base=chunk.streams[0].time_base)`
			`asrc = ograph.add_abuffer(template=chunk.streams[1], time_base=chunk.streams[1].time_base)`
			`overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base)`
			`overlay_fo = ograph.add("fade", args="out:240:60")`
			`overlay.link_to(overlay_fo, 0, 0)`
			`composite = ograph.add("overlay", args="x=4:y=4")`
			`src.link_to(composite, 0, 0)`
			`overlay_fo.link_to(composite, 0, 1)`
			`ifade = ograph.add("fade", args="in:0:60")`
			`iafade = ograph.add("afade", args="in:{}:48000".format(offset*48000/1000000))`
			`ofade = ograph.add("fade", args="out:{}:60".format((chunk.duration*60/1000000)-60))`
			`oafade = ograph.add("afade", args="out:{}:48000".format(((offset+chunk.duration)*48000/1000000)-48000))`
			`composite.link_to(ifade, 0, 0)`
			`asrc.link_to(iafade, 0, 0)`
			`ifade.link_to(ofade, 0, 0)`
			`iafade.link_to(oafade, 0, 0)`
			`ofade.link_to(sink, 0, 0)`
			`oafade.link_to(asink, 0, 0)`

			`ograph.configure()`
			`for packet in chunk.demux():`
			`if packet.dts is None:`
			`continue`
			`packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)`
			`packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)`
			`if packet.stream_index == 0: # TODO: robustness`
			`for ifr in packet.decode():`
			`text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")`
			`text_frame.planes[0].update(img.make_blob(format="rgba"))`
			`text_frame.pts = ifr.pts`
			`text_frame.dts = ifr.dts`
			`text_frame.time_base = ifr.time_base`
			`overlay.push(text_frame)`
			`src.push(ifr)`
			`ofr = sink.pull()`
			`for p in output.streams[packet.stream_index].encode(ofr):`
			`output.mux(p)`
			`else:`
			`for ifr in packet.decode():`
			`asrc.push(ifr)`
			`ofr = asink.pull()`
			`for p in output.streams[packet.stream_index].encode(ofr):`
			`output.mux(p)`
			`offset += chunk.duration`
			`chunk.close()`
			`output.close()`