While working, I always listen to music.
Sometimes from Youtube videos, as I enjoy live DJ sets - particularly from some specific DJs/Youtube channels.
However, it's annoying (and distracting) to listen from Youtube - I don't need to see the video, and don't want to click every now and then to confirm "Yes, I am still there!".
Most of my music I listen from my local library, using Audirvana as Digital Music Player.
So I wanted to download entire channels and only get the music from the videos, tagged properly so they display nicely in Audirvana.
First step was to download all videos from a given Youtube channel.
This is easy using Pulltube, including the ability to download the thumbnail for each video.
So input of my script is a folder containing videos and images, each named the same, so one .mp4
and one .jpg
for each video.
Ideally, I'm aiming to have only high-fidelity, lossless audio files in my library (.flac
or .aac
).
However after checking one example with Audirvana (which includes an audio analysis tool), it appears that the audio embedded in the Youtube videos of that channel are encoded in .mp3
at 128kbps.
Not great, but enought anyway for live electronic music.
Here is the script I came up with to process the videos from there:
from pathlib import Path
import moviepy.editor as mp
from mutagen.mp3 import MP3
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, error
import mutagen.id3
from mutagen.id3 import ID3, TIT2, TIT3, TALB, TPE1, TRCK, TYER
from send2trash import send2trash
### Global Variables
v = True # verbose mode
count = 0
count_to_do = 70 # set a max for tests
input_folder = 'path/to/folder/with/music/videos'
output_folder = 'path/to/output/music/files'
existing_files = [os.path.splitext(files)[0] for files in os.listdir(output_folder) if '.' in files]
if v:
print(f"Existing files:")
for f in existing_files:
print(f)
print()
### Loop
for folder_path, subfolders, files in os.walk(input_folder): # tuple: (folder_path-->str, subfolders-->list, files-->list)
# 1st loop root folder then loop through each subfolder
count_folders += 1
if v:
print()
print(type(folder_path), f"{folder_path=}")
print(type(subfolders), f"{subfolders=}")
print(type(files), f"{files=}")
count_files += len(files)
if len(files) > 0:
for file in files: # file --> str name w/ extension
if v:
print(f"\n{file=}")
file_name = os.path.splitext(file)[0]
file_extension = os.path.splitext(file)[1]
if 'mp4' in file_extension:
if file_name not in existing_files:
count += 1
if count < count_to_do + 1:
print(f"Processing {file_name} with path: {folder_path}/{file}")
try:
my_clip = mp.VideoFileClip(rf"{folder_path}/{file}")
print(f"Writing audio file to {output_folder}/{file_name}.mp3")
my_clip.audio.write_audiofile(rf"{output_folder}/{file_name}.mp3")
# Tags
title = file_name.strip()
mp3file = MP3(f"{output_folder}/{file_name}.mp3", ID3=EasyID3)
mp3file['title'] = [title]
mp3file['albumartist'] = ['enter_artist_name']
mp3file['artist'] = ['enter_artist_name']
mp3file.save()
# COVER
try: # handling video files with no thumbnail
mp3file = ID3(f"{output_folder}/{file_name}.mp3")
cover_path = f"{folder_path}/{file_name}.jpg"
print(cover_path)
with open(cover_path, 'rb') as albumart:
mp3file['APIC'] = APIC(
encoding=3,
mime='image/jpeg',
type=3, desc=u'Cover',
data=albumart.read()
)
mp3file.save()
send2trash(cover_path)
except:
print(f"COVER ERROR with {cover_path}")
continue
send2trash(f"{folder_path}/{file}")
except:
print(f"ERROR with {folder_path}")
continue
print()
else:
print(f"{file_name} already processed.")