Think of this example as a powerpoint presentation on steroids - fully automated with animated videos in the background.
The code below leverage free online video resources (e.g., pexel), and free libraries for text to speech (pyttsx3).
This script builds a single video - goes through the small texts and their associated video, combines them and builds a single video.
About the example
• Just 2 sentences and 2 videos
• Random videos from Pexel and some gibberish words to show the scripts work
• Proof of concept (could be optmized so it doens't redownload videos/convert each time it's run but only when changes occur)
• Also only uses the default voice (you might have other voices settings to make it more realistic)
Generated video output for the below script.
# pip install moviepy pyttsx3
import requests import os import moviepy.editor as mp import pyttsx3
# Function to download video from Pexels def download_video(url, filename): response = requests.get(url) with open(filename, 'wb') as f: f.write(response.content)
# Function to convert text to speech using pyttsx3 def text_to_speech(text, output_file): engine = pyttsx3.init() engine.setProperty('rate', 100) # Adjust the speed here (default is 200) engine.save_to_file(text, output_file) engine.runAndWait()
# Function to combine video with audio def combine_video_audio(video_file, audio_file, output_file): video = mp.VideoFileClip(video_file) audio = mp.AudioFileClip(audio_file) if audio.duration < video.duration: video = video.subclip(0, audio.duration) # Trim video to match audio duration elif audio.duration > video.duration: # Repeat video until its duration matches audio duration num_repeats = int(audio.duration / video.duration) + 1 video = mp.concatenate_videoclips([video] * num_repeats) video = video.subclip(0, audio.duration) # Trim excess duration video = video.set_audio(audio) video.write_videofile(output_file)
# Function to add background music to video def add_background_music(video_clip, background_music_file, output_file): # Load background music background_music = mp.AudioFileClip(background_music_file)
# Set background music duration to match the video duration background_music = background_music.subclip(0, video_clip.duration)
# Combine video audio with background music final_audio = mp.CompositeAudioClip([video_clip.audio, background_music])
# Set the combined audio to the video video_clip = video_clip.set_audio(final_audio)
# Write the final video with background music video_clip.write_videofile(output_file)
# Download videos from Pexels video_urls = ["https://www.pexels.com/download/video/4434242/", "https://www.pexels.com/download/video/5968893/"] # Add Pexels video URLs here video_filenames = [] for i, url in enumerate(video_urls): filename = f"video_{i}.mp4" download_video(url, filename) video_filenames.append(filename)
# Convert text to speech texts = ["The wise old fox can learn more from the frog than the frog can learn from the fox", "Do you like cats? Or do cats like mice?"] # Add texts for narration here audio_filenames = [] for i, text in enumerate(texts): audio_filename = f"audio_{i}.mp3" text_to_speech(text, audio_filename) audio_filenames.append(audio_filename)
# Combine video with audio output_filename = "output_video.mp4" combined_videos = [] for i in range(len(video_filenames)): video_file = video_filenames[i] audio_file = audio_filenames[i] combined_filename = f"combined_video_{i}.mp4" combine_video_audio(video_file, audio_file, combined_filename) combined_videos.append(combined_filename)
# Concatenate combined videos final_video = mp.concatenate_videoclips([mp.VideoFileClip(v) for v in combined_videos]) # final_video.write_videofile(output_filename)
# Download background music from URL background_music_url = "https://www.dropbox.com/s/hz7wlfj115mvwwf/0465.%20Spring%20-%20AShamaluevMusic.mp3?dl=1" # Replace with the URL of your background music background_music_filename = "background_music.mp3"
# Add background music final_output_file = "final_output_video.mp4" add_background_music(final_video, background_music_filename, final_output_file)
Text To Speech Videos (Text On Screen)
This version takes things a bit further by adding 'text' to the bottom of each video section (i.e., text being converted to speech is added to the bottom so you can both listen or read the text while the video plays).
import requests import os import moviepy.editor as mp import pyttsx3 import cv2 import numpy as np
# Function to download video from Pexels def download_video(url, filename): response = requests.get(url) with open(filename, 'wb') as f: f.write(response.content)
# Function to convert text to speech using pyttsx3 def text_to_speech(text, output_file): engine = pyttsx3.init() engine.setProperty('rate', 100) # Adjust the speed here (default is 200) engine.save_to_file(text, output_file) engine.runAndWait()
# Function to combine video with audio def combine_video_audio(video_file, audio_file, output_file): video = mp.VideoFileClip(video_file) audio = mp.AudioFileClip(audio_file) if audio.duration < video.duration: video = video.subclip(0, audio.duration) # Trim video to match audio duration elif audio.duration > video.duration: # Repeat video until its duration matches audio duration num_repeats = int(audio.duration / video.duration) + 1 video = mp.concatenate_videoclips([video] * num_repeats) video = video.subclip(0, audio.duration) # Trim excess duration video = video.set_audio(audio) return video
# Function to add text to video frames def add_text_to_video_frames(video_clip, text, output_file): def add_text(frame): font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (10, frame.shape[0] - 20) fontScale = 3 fontColor = (255, 255, 255) lineType = 2 cv2.putText(frame, text, bottomLeftCornerOfText, font, fontScale, fontColor, lineType) return frame
# Function to add background music to video def add_background_music(video_clip, background_music_file, output_file): # Load background music background_music = mp.AudioFileClip(background_music_file)
# Set background music duration to match the video duration background_music = background_music.subclip(0, video_clip.duration)
# Combine video audio with background music final_audio = mp.CompositeAudioClip([video_clip.audio, background_music])
# Set the combined audio to the video video_clip = video_clip.set_audio(final_audio)
# Write the final video with background music video_clip.write_videofile(output_file)
# Download videos from Pexels video_urls = ["https://www.pexels.com/download/video/4434242/", "https://www.pexels.com/download/video/5968893/"] # Add Pexels video URLs here video_filenames = [] for i, url in enumerate(video_urls): filename = f"video_{i}.mp4" download_video(url, filename) video_filenames.append(filename)
# Convert text to speech texts = ["The wise old fox can learn more from the frog than the frog can learn from the fox", "Do you like cats? Or do cats like mice?"] # Add texts for narration here audio_filenames = [] for i, text in enumerate(texts): audio_filename = f"audio_{i}.mp3" text_to_speech(text, audio_filename) audio_filenames.append(audio_filename)
# Combine video with audio and add text output_filenames = [] for i in range(len(video_filenames)): video_file = video_filenames[i] audio_file = audio_filenames[i] combined_filename = f"combined_video_{i}.mp4" video_clip = combine_video_audio(video_file, audio_file, combined_filename) output_filename = f"output_video_{i}.mp4" add_text_to_video_frames(video_clip, texts[i], output_filename) output_filenames.append(output_filename)
# Concatenate combined videos final_video = mp.concatenate_videoclips([mp.VideoFileClip(v) for v in output_filenames])
# Download background music from URL background_music_url = "https://www.dropbox.com/s/hz7wlfj115mvwwf/0465.%20Spring%20-%20AShamaluevMusic.mp3?dl=1" # Replace with the URL of your background music background_music_filename = "background_music.mp3" download_video(background_music_url, background_music_filename)
# Add background music final_output_file = "final_output_video.mp4" add_background_music(final_video, background_music_filename, final_output_file)
Things To Try
Some experiments and things to try to take things a bit further:
• Move the text to the top of the screen instead of the bottom (seen better)
• Change the color of the on screen text
• Try more text and video sections (e.g., example was 2 but you can try 6)
• Optimize the code so the downloaded videos are cached in a folder - not downloaded each time it runs (only when it is missing)
• Add some exta overlapping graphics on top of the video
• Try changing the speed and voice for each of the different clip sections
• Develop a simple UI tools, that can edit/preview/download the video
• Upload your videos onto Youtube (share with others your creations)
• Try modifying the text on screen - so it isn't just at the bottom of the video clip section - but the words change as the text is read out on the video
Advanced
• Setting up python web-server and writing a Javascript/HTML interface to edit/load/save videos (run python in the backend using python server setup - see the links and resources at the end for more information)
• Auto match and search for videos on pexel - then recommend them as the default starting videos
Links & Resources
• Text To Speech (Offline) Using pyttsx3 Library LINK • Web Programming (e.g., Python Server) LINK
Visitor:
Copyright (c) 2002-2025 xbdev.net - All rights reserved.
Designated articles, tutorials and software are the property of their respective owners.