Hardcode fewer things. Convert tabs to spaces.

2025-06-12 23:47:22 -04:00 · 2025-06-12 23:47:22 -04:00 · 97616b341d
commit 97616b341d
parent 2e9b8b148e
1 changed files with 122 additions and 108 deletions
--- a/dcc/concat.py
+++ b/dcc/concat.py
@ -9,117 +9,131 @@ import numpy as np
 import wand.image

 class Concat(dcc.doom_base.Wad):
-	def get_parser(self, prog_name):
-		parser = super().get_parser(prog_name)
-		parser.add_argument("start_map")
-		parser.add_argument("end_map")
-		parser.add_argument("-n", "--nooverlay", action="store_true")
-		return parser
+    def get_parser(self, prog_name):
+        parser = super().get_parser(prog_name)
+        parser.add_argument("start_map")
+        parser.add_argument("end_map")
+        parser.add_argument("-n", "--nooverlay", action="store_true")
+        return parser

-	def take_action(self, parsed_args):
-		logging.basicConfig()
-		av.logging.set_level(av.logging.VERBOSE)
-		av.logging.restore_default_callback()
-		videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4")
-		output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w")
-		offset = 0
-		# We'd like to use the concat filter here and connect everything into a
-		# single filter graph... but it produces a "Resource temporarily
-		# unavailable" error when switching to inputs after the first.  Presumably
-		# fixable, but it's easier to just make one graph per video and mux
-		# everything together at the end.
-		for v in sorted(videos):
-			# TODO: Support UDoom in literally any way.
-			if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and
-				v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"):
-				continue
+    def take_action(self, parsed_args):
+        logging.basicConfig()
+        av.logging.set_level(av.logging.VERBOSE)
+        av.logging.restore_default_callback()
+        videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4")
+        output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w")
+        offset = 0
+        # We'd like to use the concat filter here and connect everything into a
+        # single filter graph... but it produces a "Resource temporarily
+        # unavailable" error when switching to inputs after the first.  Presumably
+        # fixable, but it's easier to just make one graph per video and mux
+        # everything together at the end.
+        for v in sorted(videos):
+            # TODO: Support UDoom in literally any way.
+            if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and
+                v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"):
+                continue

-			chunk = av.open(v)
-			ograph = av.filter.Graph()
-			sink = ograph.add("buffersink")
-			asink = ograph.add("abuffersink")
+            chunk = av.open(v)
+            if not (len(chunk.streams.video) == 1 and len(chunk.streams.audio) == 1):
+                raise Exception(f"irregular chunk {v}: streams {chunk.streams} (expected 1 video & 1 audio)")

-			if not parsed_args.nooverlay:
-				img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width)
-				mapstring = v.name[-6:-4]
-				text = self._config["map_names"][f"map{mapstring}"]
-				dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
-				img.trim(reset_coords=True)
-				img.border("graya(25%, 25%)", 10, 10)
-				img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
-				# for this to work... the image needs to have a width that's a multiple
-				# of 8.  dude whyyyyyyy
-				padfactor=8
-				img.border("transparent", padfactor, 0)
-				img.crop(width=img.width-img.width%padfactor, height=img.height)
-				text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")
+            ograph = av.filter.Graph()
+            sink = ograph.add("buffersink")
+            asink = ograph.add("abuffersink")

-			if len(output.streams.get()) == 0:
-				# TODO: less hardcoding.
-				output.add_stream("h264", rate=61440)
-				output.streams[0].extradata = copy.deepcopy(chunk.streams[0].extradata)
-				output.streams[0].height=1440
-				output.streams[0].width=2560
-				output.streams[0].profile="High"
-				output.streams[0].qmax = chunk.streams[0].qmax
-				output.streams[0].qmin = chunk.streams[0].qmin
-				output.streams[0].codec_context.gop_size=30
-				output.streams[0].codec_context.max_b_frames=2
-				output.streams[0].codec_context.framerate = fractions.Fraction(60,1)
-				output.streams[0].codec_context.pix_fmt="yuv420p"
-				output.streams[0].codec_context.bit_rate = chunk.streams[0].codec_context.bit_rate
-				output.add_stream("aac", rate=48000)
-				output.streams[1].extradata = copy.deepcopy(output.streams[1].extradata)
-				output.streams[1].rate=48000
-				output.streams[1].bit_rate=chunk.streams[1].bit_rate
-			src = ograph.add_buffer(template=chunk.streams[0], time_base=chunk.streams[0].time_base)
-			asrc = ograph.add_abuffer(template=chunk.streams[1], time_base=chunk.streams[1].time_base)
-			ifade = ograph.add("fade", args="in:0:60")
-			iafade = ograph.add("afade", args="in:{}:48000".format(offset*48000/1000000))
-			ofade = ograph.add("fade", args="out:{}:60".format((chunk.duration*60/1000000)-60))
-			oafade = ograph.add("afade", args="out:{}:48000".format(((offset+chunk.duration)*48000/1000000)-48000))
-			if not parsed_args.nooverlay:
-				overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base)
-				overlay_fo = ograph.add("fade", args="out:240:60")
-				overlay.link_to(overlay_fo, 0, 0)
-				composite = ograph.add("overlay", args="x=4:y=4")
-				src.link_to(composite, 0, 0)
-				overlay_fo.link_to(composite, 0, 1)
-				composite.link_to(ifade, 0, 0)
-			else:
-				src.link_to(ifade, 0, 0)
-			asrc.link_to(iafade, 0, 0)
-			ifade.link_to(ofade, 0, 0)
-			iafade.link_to(oafade, 0, 0)
-			ofade.link_to(sink, 0, 0)
-			oafade.link_to(asink, 0, 0)
+            if not parsed_args.nooverlay:
+                img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width)
+                mapstring = v.name[-6:-4]
+                text = self._config["map_names"][f"map{mapstring}"]
+                dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
+                img.trim(reset_coords=True)
+                img.border("graya(25%, 25%)", 10, 10)
+                img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
+                # for this to work... the image needs to have a width that's a multiple
+                # of 8.  dude whyyyyyyy
+                padfactor=8
+                img.border("transparent", padfactor, 0)
+                img.crop(width=img.width-img.width%padfactor, height=img.height)

-			ograph.configure()
-			for packet in chunk.demux():
-				if packet.dts is None:
-					continue
-				packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
-				packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
-				if packet.stream_index == 0:  # TODO: robustness
-					for ifr in packet.decode():
-						if not parsed_args.nooverlay:
-							text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")
-							text_frame.planes[0].update(img.make_blob(format="rgba"))
-							text_frame.pts = ifr.pts
-							text_frame.dts = ifr.dts
-							text_frame.time_base = ifr.time_base
-							overlay.push(text_frame)
-						src.push(ifr)
-						ofr = sink.pull()
-						for p in output.streams[packet.stream_index].encode(ofr):
-							output.mux(p)
-				else:
-					for ifr in packet.decode():
-						asrc.push(ifr)
-						ofr = asink.pull()
-						for p in output.streams[packet.stream_index].encode(ofr):
-							output.mux(p)
-			offset += chunk.duration
-			chunk.close()
-		output.close()
+            if len(output.streams.get()) == 0:
+                # We can't use the input stream as a template here; it doesn't
+                # have everything needed to do encoding and will fail
+                # mysteriously later.
+                vs = chunk.streams.video[0]
+                output.add_stream("h264", rate=int(vs.time_base.denominator/vs.time_base.numerator))
+                output.streams[0].extradata = copy.deepcopy(vs.extradata)
+                output.streams[0].height=vs.height
+                output.streams[0].width=vs.width
+                output.streams[0].qmax = vs.qmax
+                output.streams[0].qmin = vs.qmin
+                output.streams[0].codec_context.bit_rate = vs.codec_context.bit_rate
+                output.streams[0].codec_context.framerate = vs.base_rate
+                output.streams[0].codec_context.pix_fmt = vs.codec_context.pix_fmt
+                # The following are only used for encoding and have no equivalent on the input stream.
+                output.streams[0].profile="High"
+                output.streams[0].codec_context.gop_size=30
+                output.streams[0].codec_context.max_b_frames=2
+
+                astr = chunk.streams.audio[0]
+                output.add_stream("aac", rate=astr.rate)
+                output.streams[1].extradata = copy.deepcopy(astr.extradata)
+                output.streams[1].bit_rate=astr.bit_rate
+
+            src = ograph.add_buffer(template=chunk.streams.video[0], time_base=chunk.streams.video[0].time_base)
+            asrc = ograph.add_abuffer(template=chunk.streams.audio[0], time_base=chunk.streams.audio[0].time_base)
+            # TODO: video fades are absolute relative to the input video; audio
+            # fades need to have their timestamps offset by the position in the
+            # final video.  Clarify if this is really necessary.
+            frame_rate = chunk.streams.video[0].base_rate
+            sample_rate = chunk.streams.audio[0].rate
+            ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
+            ofade = ograph.add("fade", args="out:{}:{}".format((chunk.duration*frame_rate/1000000)-frame_rate, frame_rate))
+            iafade = ograph.add("afade", args="in:{}:{}".format(offset*sample_rate/1000000, sample_rate))
+            oafade = ograph.add("afade", args="out:{}:{}".format(((offset+chunk.duration)*sample_rate/1000000)-sample_rate, sample_rate))
+            if not parsed_args.nooverlay:
+                overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base)
+                overlay_fo = ograph.add("fade", args="out:{}:{}".format(4*frame_rate, frame_rate))
+                overlay.link_to(overlay_fo, 0, 0)
+                composite = ograph.add("overlay", args="x=4:y=4")
+                src.link_to(composite, 0, 0)
+                overlay_fo.link_to(composite, 0, 1)
+                composite.link_to(ifade, 0, 0)
+            else:
+                src.link_to(ifade, 0, 0)
+
+            asrc.link_to(iafade, 0, 0)
+            ifade.link_to(ofade, 0, 0)
+            iafade.link_to(oafade, 0, 0)
+            ofade.link_to(sink, 0, 0)
+            oafade.link_to(asink, 0, 0)
+            ograph.configure()
+
+            for packet in chunk.demux():
+                if packet.dts is None:
+                    continue
+                packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
+                packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
+                if packet.stream == chunk.streams.video[0]:
+                    for ifr in packet.decode():
+                        if not parsed_args.nooverlay:
+                            text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")
+                            text_frame.planes[0].update(img.make_blob(format="rgba"))
+                            text_frame.pts = ifr.pts
+                            text_frame.dts = ifr.dts
+                            text_frame.time_base = ifr.time_base
+                            overlay.push(text_frame)
+                        src.push(ifr)
+                        ofr = sink.pull()
+                        for p in output.streams[packet.stream_index].encode(ofr):
+                            output.mux(p)
+                else:
+                    for ifr in packet.decode():
+                        asrc.push(ifr)
+                        ofr = asink.pull()
+                        for p in output.streams[packet.stream_index].encode(ofr):
+                            output.mux(p)
+            offset += chunk.duration
+            chunk.close()
+        output.close()