YT-DLP

Feature-rich command-line audio/video downloader

Started using YT-DLP to download videos from YouTube as part of Kalturee (my collection of scripts to help with demos at Kaltura) Kalturee

Download videos

def download_youtube_video(url, output_path=None, format='mp4'):
    """
    Download a YouTube video using yt-dlp along with its thumbnail and metadata

    Args:
        url (str): YouTube URL to download
        output_path (str, optional): Directory to save the video. Defaults to current directory.
        format (str, optional): Video format. Defaults to 'mp4'.

    Returns:
        bool: True if download was successful, False otherwise
    """
    try:
        import yt_dlp
        import json
        from PIL import Image
        import glob

        if not output_path:
            output_path = os.getcwd()

        # Make sure output directory exists
        os.makedirs(output_path, exist_ok=True)

        # Configure yt-dlp options
        ydl_opts = {
            'format': f'bestvideo[ext={format}]+bestaudio[ext=m4a]/best[ext={format}]',
            'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
            'writethumbnail': True,  # Download thumbnail
            'writeinfojson': True,   # Save metadata as JSON
            'quiet': not verbose,
            'progress': verbose
        }

        # Create a yt-dlp instance and download the video
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            print(f"Downloading video from: {url}")
            info = ydl.extract_info(url, download=True)
            video_title = info.get('title', 'Unknown title')

            if verbose:
                print(f"Downloaded: {video_title}")

            # Convert webp thumbnail to jpg
            webp_files = glob.glob(os.path.join(output_path, f"{video_title}*.webp"))
            for webp_file in webp_files:
                try:
                    jpg_file = webp_file.replace('.webp', '.jpg')
                    Image.open(webp_file).convert("RGB").save(jpg_file, "JPEG")
                    # Remove original webp file after conversion
                    os.remove(webp_file)
                    if verbose:
                        print(f"Converted thumbnail to JPG: {jpg_file}")
                except Exception as e:
                    print(f"Error converting thumbnail: {e}")

            if verbose:
                print(f"Thumbnail and metadata saved to: {output_path}")

            global count
            count += 1
            return True

    except Exception as e:
        print(f"Error downloading video: {e}")
        return False

# MAIN

# Example usage
youtube_url = "https://www.youtube.com/watch?v=pYsv9hxGo_0"  # Replace with your desired YouTube URL
# youtube_url = "https://www.youtube.com/@GlencoreVideos"  # Replace with your desired YouTube URL
download_folder = "/Users/nic/Downloads/temp"  # Folder to save downloaded videos

# Download the video
success = download_youtube_video(youtube_url, download_folder)

if success:
    print("Download completed successfully!")
else:
    print("Download failed.")

Download metadata

def download_youtube_video(url, output_path=None, format='mp4'):
    """
    Download a YouTube video using yt-dlp

    Args:
        url (str): YouTube URL to download
        output_path (str, optional): Directory to save the video. Defaults to current directory.
        format (str, optional): Video format. Defaults to 'mp4'.

    Returns:
        bool: True if download was successful, False otherwise
    """
    try:
        import yt_dlp

        if not output_path:
            output_path = os.getcwd()

        # Make sure output directory exists
        os.makedirs(output_path, exist_ok=True)

        # Configure yt-dlp options
        ydl_opts = {
            'format': f'bestvideo[ext={format}]+bestaudio[ext=m4a]/best[ext={format}]',
            'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
            'quiet': not verbose,
            'progress': verbose
        }

        # Create a yt-dlp instance and download the video
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            print(f"Downloading video from: {url}")
            info = ydl.extract_info(url, download=True)
            if verbose:
                print(f"Downloaded: {info.get('title', 'Unknown title')}")

            global count
            count += 1
            return True

    except Exception as e:
        print(f"Error downloading video: {e}")
        return False

def get_youtube_channel_videos(channel_url, limit=None):
    """
    Get a list of videos and their metadata from a YouTube channel

    Args:
        channel_url (str): YouTube channel URL
        limit (int, optional): Maximum number of videos to retrieve. None means all videos.

    Returns:
        list: List of dictionaries containing video metadata
    """
    try:
        import yt_dlp

        # Configure yt-dlp options for retrieving all videos
        ydl_opts = {
            'extract_flat': 'in_playlist',
            'quiet': not verbose,
            'ignoreerrors': True,
            'simulate': True,
            'no_warnings': True,
            'playlistend': limit if limit else None  # Limit if specified
        }

        # If getting all videos, need to fetch the channel's playlist URL
        if "/@" in channel_url:
            # Add /videos to the URL to ensure we get the videos tab
            if not channel_url.endswith('/videos'):
                channel_url = channel_url + '/videos'

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            print(f"Fetching videos from channel: {channel_url}")
            channel_info = ydl.extract_info(channel_url, download=False)

            # Get all videos from the channel
            all_videos = []

            # Handle the nested playlist structure
            if channel_info.get('_type') == 'playlist':
                for item in channel_info.get('entries', []):
                    if item.get('_type') == 'playlist':
                        # This is a tab (like 'Videos' or 'Shorts')
                        print(f"Processing playlist: {item.get('title')}")

                        # Extract all videos from this tab
                        for video in item.get('entries', []):
                            if video and video.get('_type') != 'playlist':
                                all_videos.append(video)
                    elif item:
                        # Direct video item
                        all_videos.append(item)

            # Extract relevant metadata
            videos = []
            for entry in all_videos:
                if not entry:
                    continue

                video_data = {
                    'title': entry.get('title', 'Unknown title'),
                    'id': entry.get('id'),
                    'url': entry.get('url') or f"https://www.youtube.com/watch?v={entry.get('id')}",
                    'upload_date': entry.get('upload_date'),
                    'duration': entry.get('duration'),
                    'duration_string': format_duration(entry.get('duration')),
                    'view_count': entry.get('view_count'),
                    'description': entry.get('description')
                }
                videos.append(video_data)

            # Apply overall limit if specified
            if limit and len(videos) > limit:
                videos = videos[:limit]

            global count
            count = len(videos)

            if verbose:
                print(f"Found {count} videos total")

            return videos

    except Exception as e:
        print(f"Error fetching channel videos: {e}")
        import traceback
        traceback.print_exc()
        return []

def format_duration(seconds):
    """
    Format duration in seconds to a readable time string (HH:MM:SS)

    Args:
        seconds (int or float): Duration in seconds

    Returns:
        str: Formatted duration string
    """
    if not seconds:
        return "Unknown"

    # Convert to integer to handle float values
    try:
        seconds_int = int(seconds)
        hours = seconds_int // 3600
        minutes = (seconds_int % 3600) // 60
        seconds = seconds_int % 60

        if hours > 0:
            return f"{hours}:{minutes:02d}:{seconds:02d}"
        else:
            return f"{minutes}:{seconds:02d}"
    except (TypeError, ValueError):
        return "Unknown"

# MAIN

# Example usage
youtube_channel_url = "https://www.youtube.com/@abb"  # Replace with your desired YouTube channel
video_limit = None  # Set to None to get all videos, or a number to limit results

# Get videos from the channel
videos = get_youtube_channel_videos(youtube_channel_url, video_limit)

if videos:
    # Sort videos by duration (longest first)
    videos_sorted = sorted(videos, key=lambda x: x.get('duration', 0) or 0, reverse=True)

    print(f"\nFound {len(videos_sorted)} videos in the channel (sorted by length):")
    for i, video in enumerate(videos_sorted, 1):
        print(f"\n{i}. {video['title']}")
        print(f"   URL: {video['url']}")
        print(f"   Duration: {video['duration_string']}")

        if video['upload_date']:
            # Format: YYYYMMDD to YYYY-MM-DD
            date = video['upload_date']
            formatted_date = f"{date[:4]}-{date[4:6]}-{date[6:8]}" if len(date) >= 8 else "Unknown"
            print(f"   Uploaded: {formatted_date}")
        if video['view_count']:
            print(f"   Views: {video['view_count']:,}")
else:
    print("No videos found or error occurred.")

links

social