Python script: Extract audio from Youtube videos

and tag the files

While working, I always listen to music.

Sometimes from Youtube videos, as I enjoy live DJ sets - particularly from some specific DJs/Youtube channels.

However, it's annoying (and distracting) to listen from Youtube - I don't need to see the video, and don't want to click every now and then to confirm "Yes, I am still there!".

Most of my music I listen from my local library, using Audirvana as Digital Music Player.

So I wanted to download entire channels and only get the music from the videos, tagged properly so they display nicely in Audirvana.

First step was to download all videos from a given Youtube channel.

This is easy using Pulltube, including the ability to download the thumbnail for each video.

Pulltube

So input of my script is a folder containing videos and images, each named the same, so one .mp4 and one .jpg for each video.

Ideally, I'm aiming to have only high-fidelity, lossless audio files in my library (.flac or .aac).
However after checking one example with Audirvana (which includes an audio analysis tool), it appears that the audio embedded in the Youtube videos of that channel are encoded in .mp3 at 128kbps.
Not great, but enought anyway for live electronic music.

Here is the script I came up with to process the videos from there:

from pathlib import Path
import moviepy.editor as mp

from mutagen.mp3 import MP3  
from mutagen.easyid3 import EasyID3  
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, error
import mutagen.id3  
from mutagen.id3 import ID3, TIT2, TIT3, TALB, TPE1, TRCK, TYER  

from send2trash import send2trash

### Global Variables

v = True # verbose mode
count = 0
count_to_do = 70 # set a max for tests

input_folder = 'path/to/folder/with/music/videos'
output_folder = 'path/to/output/music/files'

existing_files = [os.path.splitext(files)[0] for files in os.listdir(output_folder) if '.' in files]

if v:
    print(f"Existing files:")
    for f in existing_files:
        print(f)
    print()

### Loop

for folder_path, subfolders, files in os.walk(input_folder): # tuple: (folder_path-->str, subfolders-->list, files-->list)
    # 1st loop root folder then loop through each subfolder

    count_folders += 1

    if v:
        print()
        print(type(folder_path), f"{folder_path=}")
        print(type(subfolders), f"{subfolders=}")
        print(type(files), f"{files=}")

    count_files += len(files)

    if len(files) > 0:

        for file in files: # file --> str name w/ extension
            if v:
                print(f"\n{file=}")

            file_name = os.path.splitext(file)[0]
            file_extension = os.path.splitext(file)[1]

            if 'mp4' in file_extension:

                if file_name not in existing_files:

                    count += 1

                    if count < count_to_do + 1:

                        print(f"Processing {file_name} with path: {folder_path}/{file}")

                        try:
                            my_clip = mp.VideoFileClip(rf"{folder_path}/{file}")

                            print(f"Writing audio file to {output_folder}/{file_name}.mp3")
                            my_clip.audio.write_audiofile(rf"{output_folder}/{file_name}.mp3")

                            # Tags

                            title = file_name.strip()
                            mp3file = MP3(f"{output_folder}/{file_name}.mp3", ID3=EasyID3)  
                            mp3file['title'] = [title]  
                            mp3file['albumartist'] = ['enter_artist_name']  
                            mp3file['artist'] = ['enter_artist_name']  

                            mp3file.save()

                            # COVER

                            try: # handling video files with no thumbnail

                                mp3file = ID3(f"{output_folder}/{file_name}.mp3")   
                                cover_path = f"{folder_path}/{file_name}.jpg"
                                print(cover_path)

                                with open(cover_path, 'rb') as albumart:
                                    mp3file['APIC'] = APIC(
                                                    encoding=3,
                                                    mime='image/jpeg',
                                                    type=3, desc=u'Cover',
                                                    data=albumart.read()
                                                    )

                                mp3file.save()

                                send2trash(cover_path)

                            except:
                                print(f"COVER ERROR with {cover_path}")
                                continue

                            send2trash(f"{folder_path}/{file}")

                        except:
                            print(f"ERROR with {folder_path}")
                            continue

                        print()

                else:
                    print(f"{file_name} already processed.")

links

social