Skip to content

Commit b00bacc

Browse files
committed
Add smooth transitions via xfade, hwaccel decoding
1 parent 88d1aa0 commit b00bacc

File tree

2 files changed

+192
-33
lines changed

2 files changed

+192
-33
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
[project]
22
name = "highlight_video_maker"
3-
version = "0.0.2"
3+
version = "0.1.0"
44
authors = [{ name = "Micha Albert", email = "[email protected]" }]
5-
description = "A utility to take several video inputs, take the loudest points, and create a compilation of them"
5+
description = "A utility to take several video inputs, take the loudest points, and create a compilation of them with smooth transitions"
66
readme = "README.md"
77
requires-python = ">=3.12"
88
classifiers = [

src/highlight_video_maker/main.py

Lines changed: 190 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,35 @@
11
import concurrent.futures
2-
from logging import Logger, getLevelNamesMapping
32
import math
43
import random
4+
import shutil
55
import subprocess
66
from collections import Counter
7+
from logging import Logger, getLevelNamesMapping
78
from pathlib import Path
8-
from typing import Dict, List
9+
from typing import Any, Dict, Generator, List
910

1011
import click
1112

1213
from .logger import get_logger
1314

1415
logger: Logger
1516

17+
XFADE_TRANSITIONS = [
18+
"fade",
19+
"slideleft",
20+
"slidedown",
21+
"smoothup",
22+
"smoothleft",
23+
"circleopen",
24+
"diagtl",
25+
"horzopen",
26+
"fadegrays",
27+
"pixelize",
28+
"hrwind",
29+
"diagbl",
30+
"diagtr",
31+
]
32+
1633

1734
@click.group()
1835
@click.option(
@@ -36,13 +53,37 @@ def cli(log_level: str):
3653
IN_DIR: Path
3754
OUT_DIR: Path
3855
CACHE_DIR = Path("/tmp/video-maker-cache")
39-
THREADS = 16
56+
THREADS = 12
4057

41-
MIN_SEGMENT_LENGTH = 3.5
42-
MAX_SEGMENT_LENGTH = 7.5
58+
MIN_SEGMENT_LENGTH = 5
59+
MAX_SEGMENT_LENGTH = 9
4360
MAX_SEGMENT_PADDING = 6
4461

4562

63+
def nonrepeating_generator(source, desired_length):
64+
"""
65+
Creates a generator that yields one item from `source`
66+
that is not equal to the last item yielded, up to
67+
`desired_length` times.
68+
"""
69+
if not source:
70+
return
71+
if len(source) == 1 and desired_length > 1:
72+
raise ValueError("Cannot avoid repetition with only one unique string.")
73+
74+
prev = None
75+
count = 0
76+
77+
while count < desired_length:
78+
choices = [s for s in source if s != prev]
79+
if not choices:
80+
raise ValueError("No valid choices left to avoid repetition.")
81+
current = random.choice(choices)
82+
yield current
83+
prev = current
84+
count += 1
85+
86+
4687
def seconds_to_timestamp(seconds: float):
4788
"""Converts total seconds to a timestamp (HH:MM:SS.ms)."""
4889
hours = int(seconds // 3600)
@@ -78,11 +119,7 @@ def generate_segment_lengths(file_length: float) -> List[float]:
78119
if remaining_length <= MAX_SEGMENT_PADDING:
79120
segment_lengths.append(remaining_length)
80121
break
81-
segment_lengths.append(
82-
random.uniform(
83-
MIN_SEGMENT_LENGTH, min(MAX_SEGMENT_LENGTH, remaining_length)
84-
)
85-
)
122+
segment_lengths.append(random.uniform(MIN_SEGMENT_LENGTH, MAX_SEGMENT_LENGTH))
86123
logger.debug(f"Generated segment lengths: {segment_lengths}")
87124
return segment_lengths
88125

@@ -113,9 +150,89 @@ def get_amplitude_of_segment(clip: Path):
113150
check=True,
114151
capture_output=True,
115152
).stderr
153+
logger.debug(res)
116154
return float(res.decode().split("mean_volume: ")[1].split(" dB")[0])
117155

118156

157+
def build_input_flags(video_files: List[str]) -> str:
158+
return " ".join(f'-i "{video}"' for video in video_files)
159+
160+
161+
def build_preprocess_filters(
162+
video_files: List[str],
163+
) -> tuple[list[str], List[str], List[str]]:
164+
filters: List[str] = []
165+
video_labels: List[str] = []
166+
audio_labels: List[str] = []
167+
for i in range(len(video_files)):
168+
filters.append(
169+
f"[{i}:v]format=yuv420p,scale=1280:720,setpts=PTS-STARTPTS,fps=30[v{i}];"
170+
)
171+
filters.append(f"[{i}:a]aresample=async=1[a{i}];")
172+
video_labels.append(f"v{i}")
173+
audio_labels.append(f"a{i}")
174+
return filters, video_labels, audio_labels
175+
176+
177+
def build_transition_filters_dynamic(
178+
filter_gen: Generator[str, Any, None],
179+
video_labels: List[str],
180+
audio_labels: List[str],
181+
durations: List[float],
182+
fade_duration: float = 1.0,
183+
) -> tuple[List[str], List[str], str, str]:
184+
vf_filters: List[str] = []
185+
af_filters: List[str] = []
186+
187+
offset = 0.0
188+
for i in range(len(video_labels) - 1):
189+
transition = next(filter_gen)
190+
offset += durations[i] - fade_duration
191+
192+
out_v = f"vxf{i+1}"
193+
out_a = f"acf{i+1}"
194+
195+
vf_filters.append(
196+
f"[{video_labels[i]}][{video_labels[i+1]}]xfade="
197+
f"transition={transition}:duration={fade_duration}:offset={offset:.2f}[{out_v}];"
198+
)
199+
video_labels[i + 1] = out_v
200+
201+
af_filters.append(
202+
f"[{audio_labels[i]}][{audio_labels[i+1]}]acrossfade="
203+
f"d={fade_duration}:c1=tri:c2=tri[{out_a}];"
204+
)
205+
audio_labels[i + 1] = out_a
206+
207+
return vf_filters, af_filters, video_labels[-1], audio_labels[-1]
208+
209+
210+
def assemble_filter_complex(
211+
pre_filters: List[str],
212+
xfade_filters: List[str],
213+
audio_fades: List[str],
214+
) -> str:
215+
return "\n".join(pre_filters + xfade_filters + audio_fades)
216+
217+
218+
def run_ffmpeg_command(
219+
input_flags: str, filter_complex: str, output_file: Path, final_audio_label: str
220+
) -> None:
221+
cmd: str = f"""
222+
ffmpeg -y {input_flags} \
223+
-filter_complex "{filter_complex}" \
224+
-map "[vxf{filter_complex.split("vxf")[-1].split("];")[0]}]" \
225+
-map "[{final_audio_label}]" \
226+
-c:v libx264 -preset slow \
227+
-c:a aac -b:a 128k "{output_file}"
228+
"""
229+
# the .split()[-1].split() lunacy gets the index of the final VXF
230+
# filter so that FFmpeg knows where to map the video output.
231+
# TODO: remove that mess and put the same logic in
232+
# build_transition_filters_dynamic
233+
subprocess.run(cmd, shell=True, check=True, capture_output=True)
234+
235+
119236
@cli.command()
120237
@click.option(
121238
"--input-dir",
@@ -145,11 +262,29 @@ def get_amplitude_of_segment(clip: Path):
145262
'or start with "./".',
146263
type=click.Path(exists=False, resolve_path=True, path_type=Path),
147264
)
265+
@click.option(
266+
"--decode-options",
267+
help="Options to pass to FFmpeg for some decode operations."
268+
"While optional, proper use of this option will significantly"
269+
"reduce processing time. Note that inclusion of any encoding options"
270+
"will cause this program to fail.",
271+
type=str,
272+
default="",
273+
)
274+
@click.option(
275+
"--num-segs",
276+
help="Total number of segments to concatenate in the output."
277+
"Controls the length of the final video.",
278+
type=int,
279+
default=10,
280+
)
148281
def run(
149282
input_dir: Path,
150283
watermark_image: Path,
151284
horiz_output_file: Path,
152285
vert_output_file: Path,
286+
decode_options: str,
287+
num_segs: int,
153288
):
154289
"""Main function that orchestrates the video processing pipeline."""
155290
logger.info("Starting video processing pipeline.")
@@ -205,10 +340,8 @@ def run(
205340
representative_video_audio_levels[seg] = representative_video_audio_futures[
206341
seg
207342
].result()
208-
209-
highest = dict(Counter(representative_video_audio_levels).most_common(10))
343+
highest = dict(Counter(representative_video_audio_levels).most_common(num_segs))
210344
loudest_seg_indexes: List[int] = [int(str(Path(k).stem)) for k in highest.keys()]
211-
212345
for video in raw_videos[2]:
213346
out_folder = Path(CACHE_DIR, "loudest", Path(video).stem)
214347
out_folder.mkdir(parents=True, exist_ok=True)
@@ -219,46 +352,72 @@ def run(
219352
seg,
220353
out_folder.parent,
221354
)
222-
355+
video_files: List[str] = []
223356
with open(str(Path(CACHE_DIR, "list.txt")), "w") as f:
224357
for seg in loudest_seg_indexes:
225358
random_seg = Path(random.choice(raw_videos[2]))
226-
f.write(
227-
f"file '{Path(CACHE_DIR, "loudest", random_seg.stem, str(seg) + random_seg.suffix)}'\n"
359+
vid_path = Path(
360+
CACHE_DIR, "loudest", random_seg.stem, str(seg) + random_seg.suffix
228361
)
362+
f.write(f"file '{vid_path}'\n")
363+
video_files.append(str(vid_path.resolve()))
364+
365+
filter_gen = nonrepeating_generator(XFADE_TRANSITIONS, num_segs)
366+
367+
input_flags: str = f"{decode_options} {build_input_flags(video_files)}"
368+
pre_filters, vlabels, alabels = build_preprocess_filters(video_files)
369+
durations = [get_video_duration(Path(vf)) for vf in video_files]
370+
vfades, afades, final_v, final_a = build_transition_filters_dynamic(
371+
filter_gen, vlabels, alabels, durations, 0.5
372+
)
373+
374+
full_filter: str = assemble_filter_complex(pre_filters, vfades, afades)
229375

376+
logger.info("Creating unmarked video...")
377+
378+
run_ffmpeg_command(
379+
output_file=CACHE_DIR
380+
/ "out-unmarked.mp4", # This file will have all the transitions without the overlayed logo
381+
input_flags=input_flags,
382+
filter_complex=full_filter,
383+
final_audio_label=final_a,
384+
)
230385

231386
logger.info("Creating horizontal video...")
232-
# Horizontal Pipeline: Concatenate clips and overlay a semi‑transparent watermark.
387+
388+
# Horizontal Pipeline: Take unmarked file and add a semi‑transparent watermark.
233389
subprocess.run(
234-
f'''ffmpeg -y -f concat -safe 0 -i "{Path(CACHE_DIR, "list.txt")}" -i "{watermark_image}" \
235-
-filter_complex "
236-
[1]format=rgba,colorchannelmixer=aa=0.5[logo];
237-
[0][logo]overlay=W-w-30:H-h-30:format=auto,format=yuv420p
238-
" -c:a aac -b:a 128k "{horiz_output_file}"''',
390+
f'''ffmpeg -y {decode_options} -i "{CACHE_DIR / "out-unmarked.mp4"}" -i "{watermark_image}" \
391+
-filter_complex " \
392+
[1]format=rgba,colorchannelmixer=aa=0.5[logo]; \
393+
[0][logo]overlay=W-w-30:H-h-30:format=auto,format=yuv420p \
394+
" -c:a aac -b:a 128k "{horiz_output_file}"''',
239395
shell=True,
240396
check=True,
241397
capture_output=True,
242398
)
243399

244400
logger.info("Creating vertical video...")
245-
# Vertical Pipeline: Concatenate, crop (zoom), split & blur for a vertical aspect ratio,
401+
402+
# Vertical Pipeline: Crop (zoom), split & blur unmarked file for a vertical aspect ratio,
246403
# then overlay a centered, opaque watermark at the bottom.
247404
subprocess.run(
248-
f'''ffmpeg -y -f concat -safe 0 -i "{Path(CACHE_DIR, "list.txt")}" -i "{watermark_image}" \
249-
-filter_complex "
250-
[0]crop=3/4*in_w:in_h[zoomed];
251-
[zoomed]split[original][copy];
252-
[copy]scale=-1:ih*(4/3)*(4/3),crop=w=ih*9/16,gblur=sigma=17:steps=5[blurred];
253-
[blurred][original]overlay=(main_w-overlay_w)/2:(main_h-overlay_h)/2[vert];
254-
[vert][1]overlay=(W-w)/2:H-h-30,format=yuv420p
255-
" -c:a aac -b:a 128k "{vert_output_file}"''',
405+
f'''ffmpeg -y {decode_options} -i "{CACHE_DIR / "out-unmarked.mp4"}" -i "{watermark_image}" \
406+
-filter_complex " \
407+
[0]crop=3/4*in_w:in_h[zoomed]; \
408+
[zoomed]split[original][copy]; \
409+
[copy]scale=-1:ih*(4/3)*(4/3),crop=w=ih*9/16,gblur=sigma=17:steps=5[blurred]; \
410+
[blurred][original]overlay=(main_w-overlay_w)/2:(main_h-overlay_h)/2[vert]; \
411+
[vert][1]overlay=(W-w)/2:H-h-30,format=yuv420p \
412+
" -c:a aac -b:a 128k "{vert_output_file}"''',
256413
shell=True,
257414
check=True,
258415
capture_output=True,
259416
)
260417

261418
logger.info("Video processing pipeline completed.")
419+
logger.info("Cleaning up temporary files...")
420+
shutil.rmtree(CACHE_DIR)
262421

263422

264423
if __name__ == "__main__":

0 commit comments

Comments
 (0)