Generally streamline and break the concat routine into functions.
Ensure the whole thing is PEP 8 compliant. Also output a text file summary for chapter markers.
This commit is contained in:
parent
6baacd9b89
commit
e8d06c616f
1 changed files with 182 additions and 110 deletions
174
dcc/concat.py
174
dcc/concat.py
|
@ -8,6 +8,7 @@ import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import wand.image
|
import wand.image
|
||||||
|
|
||||||
|
|
||||||
class Concat(dcc.doom_base.Wad):
|
class Concat(dcc.doom_base.Wad):
|
||||||
def get_parser(self, prog_name):
|
def get_parser(self, prog_name):
|
||||||
parser = super().get_parser(prog_name)
|
parser = super().get_parser(prog_name)
|
||||||
|
@ -20,80 +21,141 @@ class Concat(dcc.doom_base.Wad):
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
av.logging.set_level(av.logging.VERBOSE)
|
av.logging.set_level(av.logging.VERBOSE)
|
||||||
av.logging.restore_default_callback()
|
av.logging.restore_default_callback()
|
||||||
videos = self.fabricate.joinpath(parsed_args.wad).glob(f"{parsed_args.wad}_map*.mp4")
|
videos = (
|
||||||
output = av.open(self.fabricate.joinpath(parsed_args.wad).joinpath(f"{parsed_args.wad}_maps{parsed_args.start_map}to{parsed_args.end_map}.mp4"), "w")
|
self.fabricate.joinpath(parsed_args.wad)
|
||||||
offset = 0
|
.glob(f"{parsed_args.wad}_map*.mp4")
|
||||||
|
)
|
||||||
|
fn_base = (
|
||||||
|
f"{parsed_args.wad}_maps{parsed_args.start_map}"
|
||||||
|
+ f"to{parsed_args.end_map}"
|
||||||
|
)
|
||||||
|
output = av.open(
|
||||||
|
self.fabricate.joinpath(parsed_args.wad).joinpath(
|
||||||
|
f"{fn_base}.mp4"), "w"
|
||||||
|
)
|
||||||
|
summary_file = open(
|
||||||
|
self.fabricate.joinpath(parsed_args.wad).joinpath(
|
||||||
|
f"{fn_base}.txt"), "w"
|
||||||
|
)
|
||||||
|
|
||||||
|
self._offset = 0
|
||||||
|
summary = []
|
||||||
# We'd like to use the concat filter here and connect everything into a
|
# We'd like to use the concat filter here and connect everything into a
|
||||||
# single filter graph... but it produces a "Resource temporarily
|
# single filter graph... but it produces a "Resource temporarily
|
||||||
# unavailable" error when switching to inputs after the first. Presumably
|
# unavailable" error when switching to inputs after the first.
|
||||||
# fixable, but it's easier to just make one graph per video and mux
|
# Presumably fixable, but it's easier to just make one graph per video
|
||||||
# everything together at the end.
|
# and mux everything together at the end.
|
||||||
for v in sorted(videos):
|
for v in sorted(videos):
|
||||||
# TODO: Support UDoom in literally any way.
|
# TODO: Support UDoom in literally any way.
|
||||||
if not (v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4" and
|
if not (
|
||||||
v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"):
|
v.name >= f"{parsed_args.wad}_map{parsed_args.start_map}.mp4"
|
||||||
|
and v.name <= f"{parsed_args.wad}_map{parsed_args.end_map}.mp4"
|
||||||
|
):
|
||||||
continue
|
continue
|
||||||
|
start_time = self._offset / 1000000
|
||||||
|
text = self._add_chunk(v, output, not parsed_args.nooverlay)
|
||||||
|
list.append(
|
||||||
|
summary, f"{text} {math.floor(start_time / 60):02}:"
|
||||||
|
+ f"{math.floor(start_time % 60):02}"
|
||||||
|
)
|
||||||
|
output.close()
|
||||||
|
|
||||||
|
for line in summary:
|
||||||
|
summary_file.write(f"{line}\n")
|
||||||
|
summary_file.close()
|
||||||
|
|
||||||
|
def _add_chunk(self, v, output, overlay):
|
||||||
chunk = av.open(v)
|
chunk = av.open(v)
|
||||||
if not (len(chunk.streams.video) == 1 and len(chunk.streams.audio) == 1):
|
if not (len(chunk.streams.video) == 1
|
||||||
raise Exception(f"irregular chunk {v}: streams {chunk.streams} (expected 1 video & 1 audio)")
|
and len(chunk.streams.audio) == 1):
|
||||||
|
raise Exception(
|
||||||
|
f"irregular chunk {v}: streams {chunk.streams} "
|
||||||
|
+ f"(expected 1 video & 1 audio)"
|
||||||
|
)
|
||||||
|
|
||||||
ograph = av.filter.Graph()
|
ograph = av.filter.Graph()
|
||||||
sink = ograph.add("buffersink")
|
sink = ograph.add("buffersink")
|
||||||
asink = ograph.add("abuffersink")
|
asink = ograph.add("abuffersink")
|
||||||
|
|
||||||
if not parsed_args.nooverlay:
|
text = ""
|
||||||
img = wand.image.Image(height=chunk.streams[0].height,width=chunk.streams[0].width)
|
if overlay:
|
||||||
|
img = wand.image.Image(
|
||||||
|
height=chunk.streams[0].height,
|
||||||
|
width=chunk.streams[0].width
|
||||||
|
)
|
||||||
mapstring = v.name[-6:-4]
|
mapstring = v.name[-6:-4]
|
||||||
text = self._config["map_names"][f"map{mapstring}"]
|
text = self._config["map_names"][f"map{mapstring}"]
|
||||||
dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
|
dcc.text.draw_text(img, f"MAP{mapstring}: {text}", font_size=120)
|
||||||
img.trim(reset_coords=True)
|
img.trim(reset_coords=True)
|
||||||
img.border("graya(25%, 25%)", 10, 10)
|
img.border("graya(25%, 25%)", 10, 10)
|
||||||
img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
|
img.border(dcc.config.TEXT_STROKE_COLOR, 16, 16)
|
||||||
# for this to work... the image needs to have a width that's a multiple
|
# for this to work... the image needs to have a width that's a
|
||||||
# of 8. dude whyyyyyyy
|
# multiple of 8. dude whyyyyyyy
|
||||||
padfactor=8
|
padfactor = 8
|
||||||
img.border("transparent", padfactor, 0)
|
img.border("transparent", padfactor, 0)
|
||||||
img.crop(width=img.width-img.width%padfactor, height=img.height)
|
img.crop(
|
||||||
|
width=img.width - img.width % padfactor,
|
||||||
|
height=img.height
|
||||||
|
)
|
||||||
|
|
||||||
if len(output.streams.get()) == 0:
|
if len(output.streams.get()) == 0:
|
||||||
# We can't use the input stream as a template here; it doesn't
|
# We can't use the input stream as a template here; it doesn't
|
||||||
# have everything needed to do encoding and will fail
|
# have everything needed to do encoding and will fail
|
||||||
# mysteriously later.
|
# mysteriously later.
|
||||||
vs = chunk.streams.video[0]
|
vs = chunk.streams.video[0]
|
||||||
output.add_stream("h264", rate=int(vs.time_base.denominator/vs.time_base.numerator))
|
vr = int(vs.time_base.denominator/vs.time_base.numerator)
|
||||||
output.streams[0].extradata = copy.deepcopy(vs.extradata)
|
ovs = output.add_stream("h264", rate=vr)
|
||||||
output.streams[0].height=vs.height
|
ovs.extradata = copy.deepcopy(vs.extradata)
|
||||||
output.streams[0].width=vs.width
|
ovs.height = vs.height
|
||||||
output.streams[0].qmax = vs.qmax
|
ovs.width = vs.width
|
||||||
output.streams[0].qmin = vs.qmin
|
ovs.qmax = vs.qmax
|
||||||
output.streams[0].codec_context.bit_rate = vs.codec_context.bit_rate
|
ovs.qmin = vs.qmin
|
||||||
output.streams[0].codec_context.framerate = vs.base_rate
|
ovs.codec_context.bit_rate = vs.codec_context.bit_rate
|
||||||
output.streams[0].codec_context.pix_fmt = vs.codec_context.pix_fmt
|
ovs.codec_context.framerate = vs.base_rate
|
||||||
# The following are only used for encoding and have no equivalent on the input stream.
|
ovs.codec_context.pix_fmt = vs.codec_context.pix_fmt
|
||||||
output.streams[0].profile="High"
|
# The following are only used for encoding and have no equivalent
|
||||||
output.streams[0].codec_context.gop_size=30
|
# on the input stream.
|
||||||
output.streams[0].codec_context.max_b_frames=2
|
ovs.profile = "High"
|
||||||
|
ovs.codec_context.gop_size = 30
|
||||||
|
ovs.codec_context.max_b_frames = 2
|
||||||
|
|
||||||
astr = chunk.streams.audio[0]
|
astr = chunk.streams.audio[0]
|
||||||
output.add_stream("aac", rate=astr.rate)
|
oas = output.add_stream("aac", rate=astr.rate)
|
||||||
output.streams[1].extradata = copy.deepcopy(astr.extradata)
|
oas.extradata = copy.deepcopy(astr.extradata)
|
||||||
output.streams[1].bit_rate=astr.bit_rate
|
oas.bit_rate = astr.bit_rate
|
||||||
|
|
||||||
src = ograph.add_buffer(template=chunk.streams.video[0], time_base=chunk.streams.video[0].time_base)
|
src = ograph.add_buffer(
|
||||||
asrc = ograph.add_abuffer(template=chunk.streams.audio[0], time_base=chunk.streams.audio[0].time_base)
|
template=chunk.streams.video[0],
|
||||||
|
time_base=chunk.streams.video[0].time_base
|
||||||
|
)
|
||||||
|
asrc = ograph.add_abuffer(
|
||||||
|
template=chunk.streams.audio[0],
|
||||||
|
time_base=chunk.streams.audio[0].time_base
|
||||||
|
)
|
||||||
# TODO: video fades are absolute relative to the input video; audio
|
# TODO: video fades are absolute relative to the input video; audio
|
||||||
# fades need to have their timestamps offset by the position in the
|
# fades need to have their timestamps offset by the position in the
|
||||||
# final video. Clarify if this is really necessary.
|
# final video. Clarify if this is really necessary.
|
||||||
frame_rate = chunk.streams.video[0].base_rate
|
frame_rate = chunk.streams.video[0].base_rate
|
||||||
sample_rate = chunk.streams.audio[0].rate
|
sample_rate = chunk.streams.audio[0].rate
|
||||||
ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
|
ifade = ograph.add("fade", args="in:0:{}".format(frame_rate))
|
||||||
ofade = ograph.add("fade", args="out:{}:{}".format((chunk.duration*frame_rate/1000000)-frame_rate, frame_rate))
|
ofade_start = (chunk.duration * frame_rate / 1000000) - frame_rate
|
||||||
iafade = ograph.add("afade", args="in:{}:{}".format(offset*sample_rate/1000000, sample_rate))
|
ofade = ograph.add("fade", args=f"out:{ofade_start}:{frame_rate}")
|
||||||
oafade = ograph.add("afade", args="out:{}:{}".format(((offset+chunk.duration)*sample_rate/1000000)-sample_rate, sample_rate))
|
iafade_start = self._offset * sample_rate / 1000000
|
||||||
if not parsed_args.nooverlay:
|
iafade = ograph.add("afade", args=f"in:{iafade_start}:{sample_rate}")
|
||||||
overlay = ograph.add_buffer(width=img.width, height=img.height, format="rgba", time_base=chunk.streams[0].time_base)
|
oafade_start = (
|
||||||
overlay_fo = ograph.add("fade", args="out:{}:{}".format(4*frame_rate, frame_rate))
|
(self._offset + chunk.duration) * sample_rate / 1000000
|
||||||
|
- sample_rate
|
||||||
|
)
|
||||||
|
oafade = ograph.add("afade", args=f"out:{oafade_start}:{sample_rate}")
|
||||||
|
|
||||||
|
if overlay:
|
||||||
|
overlay = ograph.add_buffer(
|
||||||
|
width=img.width, height=img.height,
|
||||||
|
format="rgba", time_base=chunk.streams.video[0].time_base
|
||||||
|
)
|
||||||
|
overlay_fo = ograph.add(
|
||||||
|
"fade", args=f"out:{4 * frame_rate}:{frame_rate}"
|
||||||
|
)
|
||||||
overlay.link_to(overlay_fo, 0, 0)
|
overlay.link_to(overlay_fo, 0, 0)
|
||||||
composite = ograph.add("overlay", args="x=4:y=4")
|
composite = ograph.add("overlay", args="x=4:y=4")
|
||||||
src.link_to(composite, 0, 0)
|
src.link_to(composite, 0, 0)
|
||||||
|
@ -112,17 +174,16 @@ class Concat(dcc.doom_base.Wad):
|
||||||
for packet in chunk.demux():
|
for packet in chunk.demux():
|
||||||
if packet.dts is None:
|
if packet.dts is None:
|
||||||
continue
|
continue
|
||||||
packet.dts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
|
pof = (
|
||||||
packet.pts += (offset * packet.time_base.denominator) / (packet.time_base.numerator * 1000000)
|
(self._offset * packet.time_base.denominator)
|
||||||
|
/ (packet.time_base.numerator * 1000000)
|
||||||
|
)
|
||||||
|
packet.dts += pof
|
||||||
|
packet.pts += pof
|
||||||
if packet.stream == chunk.streams.video[0]:
|
if packet.stream == chunk.streams.video[0]:
|
||||||
for ifr in packet.decode():
|
for ifr in packet.decode():
|
||||||
if not parsed_args.nooverlay:
|
if overlay:
|
||||||
text_frame = av.video.frame.VideoFrame(img.width, img.height, format="rgba")
|
overlay.push(self._make_text_frame(img, ifr))
|
||||||
text_frame.planes[0].update(img.make_blob(format="rgba"))
|
|
||||||
text_frame.pts = ifr.pts
|
|
||||||
text_frame.dts = ifr.dts
|
|
||||||
text_frame.time_base = ifr.time_base
|
|
||||||
overlay.push(text_frame)
|
|
||||||
src.push(ifr)
|
src.push(ifr)
|
||||||
ofr = sink.pull()
|
ofr = sink.pull()
|
||||||
for p in output.streams[packet.stream_index].encode(ofr):
|
for p in output.streams[packet.stream_index].encode(ofr):
|
||||||
|
@ -133,7 +194,18 @@ class Concat(dcc.doom_base.Wad):
|
||||||
ofr = asink.pull()
|
ofr = asink.pull()
|
||||||
for p in output.streams[packet.stream_index].encode(ofr):
|
for p in output.streams[packet.stream_index].encode(ofr):
|
||||||
output.mux(p)
|
output.mux(p)
|
||||||
offset += chunk.duration
|
self._offset += chunk.duration
|
||||||
chunk.close()
|
chunk.close()
|
||||||
output.close()
|
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _make_text_frame(self, img, ifr):
|
||||||
|
# We need to give each frame its own memory it can own.
|
||||||
|
text_frame = av.video.frame.VideoFrame(
|
||||||
|
img.width, img.height, format="rgba"
|
||||||
|
)
|
||||||
|
text_frame.planes[0].update(img.make_blob(format="rgba"))
|
||||||
|
text_frame.pts = ifr.pts
|
||||||
|
text_frame.dts = ifr.dts
|
||||||
|
text_frame.time_base = ifr.time_base
|
||||||
|
return text_frame
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue