doomcc/dcc/concat.py

232 lines
8.4 KiB
Python
Raw Permalink Normal View History

import av
import copy
import dcc.doom_base
import enum
import fractions
import io
import logging
import math
import numpy as np
import wand.image
class State(enum.Enum):
NOT_STARTED = 1
STARTED = 2
DONE = 3
2025-09-01 19:03:57 -04:00
class Concat(dcc.doom_base.Wad):
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
parser.add_argument("start_map")
parser.add_argument("end_map")
parser.add_argument("-n", "--nooverlay", action="store_true")
return parser
def take_action(self, parsed_args):
logging.basicConfig()
av.logging.set_level(av.logging.VERBOSE)
av.logging.restore_default_callback()
videodir = self.fabricate.joinpath(parsed_args.wad)
fn_base = (
f"{parsed_args.wad}_maps{parsed_args.start_map}"
+ f"to{parsed_args.end_map}"
)
output = av.open(
self.fabricate.joinpath(parsed_args.wad).joinpath(
f"{fn_base}.mp4"), "w"
)
summary_file = open(
self.fabricate.joinpath(parsed_args.wad).joinpath(
f"{fn_base}.txt"), "w"
)
self._offset = 0
summary = []
# We'd like to use the concat filter here and connect everything into a
# single filter graph... but it produces a "Resource temporarily
# unavailable" error when switching to inputs after the first.
# Presumably fixable, but it's easier to just make one graph per video
# and mux everything together at the end.
# TODO: Support UDoom in literally any way.
2025-09-01 19:03:57 -04:00
d2maps = (
[str(x).zfill(2) for x in range(1, 16)]
+ ["31", "32"]
+ [str(x) for x in range(16, 31)]
)
state = State.NOT_STARTED
for idx in d2maps:
if idx == parsed_args.start_map:
state = State.STARTED
if idx == parsed_args.end_map:
state = State.DONE
if state == State.NOT_STARTED:
continue
start_time = self._offset / 1000000
text = self._add_chunk(
videodir.joinpath(f"{parsed_args.wad}_map{idx}.mp4"),
output, not parsed_args.nooverlay
)
list.append(
summary, f"{text} {math.floor(start_time / 60):02}:"
+ f"{math.floor(start_time % 60):02}"
)
if state == State.DONE:
break
output.close()
for line in summary:
2025-07-03 18:05:12 -04:00
summary_file.write(f"{line}\n")
summary_file.close()
def _add_chunk(self, v, output, overlay):
chunk = av.open(v)
if not (len(chunk.streams.video) == 1
and len(chunk.streams.audio) == 1):
raise Exception(
f"irregular chunk {v}: streams {chunk.streams} "
+ f"(expected 1 video & 1 audio)"
)
ograph = av.filter.Graph()
sink = ograph.add("buffersink")
asink = ograph.add("abuffersink")
text = ""
if overlay:
img = wand.image.Image(
height=chunk.streams[0].height,
width=chunk.streams[0].width
)
mapstring = v.name[-6:-4]
text = self._config["map_names"][f"map{mapstring}"]
self.draw_text(
img,
f"MAP{mapstring}: {text}",
font_size=120
)
img.trim(reset_coords=True)
img.border("graya(25%, 25%)", 10, 10)
img.border(self.thumbnail_text_stroke, 16, 16)
# for this to work... the image needs to have a width that's a
# multiple of 8. dude whyyyyyyy
padfactor = 8
img.border("transparent", padfactor, 0)
img.crop(
width=img.width - img.width % padfactor,
height=img.height
)
if len(output.streams.get()) == 0:
# We can't use the input stream as a template here; it doesn't
# have everything needed to do encoding and will fail
# mysteriously later.
vs = chunk.streams.video[0]
vr = int(vs.time_base.denominator/vs.time_base.numerator)
ovs = output.add_stream("h264", rate=vr)
ovs.extradata = copy.deepcopy(vs.extradata)
ovs.height = vs.height
ovs.width = vs.width
ovs.qmax = vs.qmax
ovs.qmin = vs.qmin
ovs.codec_context.bit_rate = vs.codec_context.bit_rate
ovs.codec_context.framerate = vs.base_rate
ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt
# The following are only used for encoding and have no equivalent
# on the input stream.
ovs.profile = "High"
ovs.codec_context.gop_size = 30
ovs.codec_context.max_b_frames = 2
astr = chunk.streams.audio[0]
oas = output.add_stream("aac", rate=astr.rate)
oas.extradata = copy.deepcopy(astr.extradata)
oas.bit_rate = astr.bit_rate
src = ograph.add_buffer(
template=chunk.streams.video[0],
time_base=chunk.streams.video[0].time_base
)
asrc = ograph.add_abuffer(
template=chunk.streams.audio[0],
time_base=chunk.streams.audio[0].time_base
)
# TODO: video fades are absolute relative to the input video; audio
# fades need to have their timestamps offset by the position in the
# final video. Clarify if this is really necessary.
frame_rate = chunk.streams.video[0].base_rate
sample_rate = chunk.streams.audio[0].rate
ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate
ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}")
iafade_start = self._offset * sample_rate / 1000000
iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}")
oafade_start = (
(self._offset + chunk.duration) * sample_rate / 1000000
- sample_rate
)
oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}")
if overlay:
overlay = ograph.add_buffer(
width=img.width, height=img.height,
format="rgba", time_base=chunk.streams.video[0].time_base
)
overlay_fo = ograph.add(
"fade", args=f"out:{4 * frame_rate}:{frame_rate}"
)
overlay.link_to(overlay_fo, 0, 0)
composite = ograph.add("overlay", args="x=4:y=4")
src.link_to(composite, 0, 0)
overlay_fo.link_to(composite, 0, 1)
composite.link_to(ifade, 0, 0)
else:
src.link_to(ifade, 0, 0)
asrc.link_to(iafade, 0, 0)
ifade.link_to(ofade, 0, 0)
iafade.link_to(oafade, 0, 0)
ofade.link_to(sink, 0, 0)
oafade.link_to(asink, 0, 0)
ograph.configure()
for packet in chunk.demux():
if packet.dts is None:
continue
pof = (
(self._offset * packet.time_base.denominator)
/ (packet.time_base.numerator * 1000000)
)
packet.dts += pof
packet.pts += pof
if packet.stream == chunk.streams.video[0]:
for ifr in packet.decode():
if overlay:
overlay.push(self._make_text_frame(img, ifr))
src.push(ifr)
ofr = sink.pull()
for p in output.streams[packet.stream_index].encode(ofr):
output.mux(p)
else:
for ifr in packet.decode():
asrc.push(ifr)
ofr = asink.pull()
for p in output.streams[packet.stream_index].encode(ofr):
output.mux(p)
self._offset += chunk.duration
chunk.close()
return text
def _make_text_frame(self, img, ifr):
# We need to give each frame its own memory it can own.
text_frame = av.video.frame.VideoFrame(
img.width, img.height, format="rgba"
)
text_frame.planes[0].update(img.make_blob(format="rgba"))
text_frame.pts = ifr.pts
text_frame.dts = ifr.dts
text_frame.time_base = ifr.time_base
return text_frame