Got it running with no errors thus far. Special thanks to @Casual_Tea, I used some of your code to debug some issues when extracting lyrics from MP3 files… Some were simply embedded with lrc content (which is what beets does), some were tagged properly (SYLT format, not supported by majority of the well-known taggers).
Most errors were thrown due to wonky implementation in MP3, so far all have seemed to be resolved.
import.sh
#!/bin/bash
# Get the directory of the script
ROOT_DIR="$(dirname "$(realpath "$0")")"
IMPORT="~/Downloads/Music"
LIBRARY="~/Music"
python3 ./import.py -em -d "$IMPORT"
### Import all Music to Library
source "$ROOT_DIR/beets/bin/activate"
beet update
beet import "$IMPORT"
beet convert -y
deactivate
python3 ./import.py -ex -d "$LIBRARY"
import.py
import os, glob, re, argparse
from mutagen import MutagenError
from mutagen.id3 import ID3, USLT, SYLT, Encoding, ID3NoHeaderError
from mutagen.mp3 import MP3
from mutagen.flac import FLAC
from mutagen.mp4 import MP4
extensions = ['flac', 'mp3', 'm4a']
lyrics_tags = ['LYRICS', 'lyrics', '\xa9lyr']
mp3_frames = [SYLT, USLT]
extension_to_tag = {
'flac': 'LYRICS',
'mp3': USLT, ## Mapping MP3 to USLT as SYLT support is wonky in FFMPEG and MP3TAG, embeds in the SYLT format where possible
'm4a': '\xa9lyr'
}
# 1 - Find Audio
def find_audio(folder):
print(f"Scanning '{folder}'")
audio_files = []
for ext in extensions:
audio_files.extend(glob.glob(f'{folder}/**/*.{ext}', recursive=True))
return audio_files
# 2 - Read Lyrics File (if any)
def read_lyrics_file(filepath):
if os.path.isfile(filepath):
print(f"Local lyrics found: '{filepath}'")
with open(filepath, 'r', encoding='utf-8') as file:
return file.read()
else:
print(f"No local lyrics file found.")
# 3 - Initialize? it
def init_audio(filepath, ext):
audio = None
if ext == 'mp3':
try:
audio = MP3(filepath, ID3=ID3)
except ID3NoHeaderError:
audio = MP3(filepath)
audio.add_tags()
elif ext == 'm4a':
audio = MP4(filepath)
elif ext == 'flac':
audio = FLAC(filepath)
return audio
# 4 - look for tags in the audio
def find_tag(audio, ext):
if ext == 'mp3':
for tag in audio.tags.values():
for frame in mp3_frames:
if isinstance(tag, frame):
return tag
else:
for tag in lyrics_tags:
try:
if tag in audio:
return tag
except ValueError:
continue # Ignore the error and continue checking other tags
# If no tags found, state and set one based on ext
print("No tags found in file, setting based on extension.")
return extension_to_tag[ext]
# NOT USABLE FOR BEETS
'''
# 5 - Be able to parse lyrics
## START CREDIT: Casual_Tea @ discourse.beets.io
def lrc_to_sylt(lyrics):
sylt_lyrics = []
timestamp_pattern = re.compile(r'^\[(\d{1,2}):(\d{2})\.(\d{2})\] *(.*)$')
lines = lyrics.split('\n')
for line in lines:
match = timestamp_pattern.match(line)
if match:
minutes, seconds, milliseconds, text = match.groups()
timestamp = (int(minutes) * 60000) + (int(seconds) * 1000) + int(milliseconds.ljust(3, '0'))
sylt_lyrics.append((text, timestamp))
return sorted(sylt_lyrics, key=lambda x: x[1])
'''
def sylt_to_lrc(sylt_frame):
lrc_lines = []
for text, timestamp in sylt_frame:
minutes = timestamp // 60000
seconds = (timestamp % 60000) / 1000
lrc_timestamp = f"[{minutes:02}:{seconds:05.2f}]"
lrc_line = f"{lrc_timestamp}{text}"
lrc_lines.append(lrc_line)
lrc_lines.sort()
lrc_content = "\n".join(lrc_lines)
return lrc_content
## END CREDIT: Casual_Tea @ discourse.beets.io
# 6 - Try to get lyrics from the file itself
def read_lyrics_audio(audio, ext, tag):
try:
if ext == 'mp3':
for val in audio.tags.values():
if isinstance(val, USLT):
lyrics = val.text
elif isinstance(val, SYLT):
return sylt_to_lrc(val)
else:
lyrics = audio.get(tag)
return lyrics[0] if isinstance(lyrics, list) else lyrics
except Exception as e:
print(f"Error reading lyrics from audio file: {e}")
return None
# 7 - Be able to strip all
def strip_tags(audio, ext):
if ext == 'mp3':
for tag in mp3_frames:
try:
audio.tags.delall(tag.__name__)
except (KeyError, ValueError):
continue
else:
for tag in lyrics_tags:
try:
del audio[tag]
except (KeyError, ValueError):
continue
# 8 - Embed from file
def embed_lyrics_from_file(audio, ext, tag, lrc_filepath):
lyrics = read_lyrics_file(lrc_filepath)
if lyrics:
if ext == 'mp3':
strip_tags(audio, ext)
## THESE ARE IRRELEVANT FOR BEETS, leaving because it's more "correct"
### lyrics_str = lrc_to_sylt(lyrics)
### audio.tags.setall("USLT", [SYLT(encoding=Encoding.UTF8, lang='eng', format=2, type=1, text=lyrics_str)])
audio.tags.add(USLT(encoding=Encoding.UTF8, lang='eng', format=2, type=1, text=lyrics))
audio.save(v2_version=3)
else:
audio[tag] = lyrics
audio.save()
os.remove(lrc_filepath)
print(f"Lyrics embeded. Deleted '{lrc_filepath}' after embedding lyrics.")
# MAIN FUNCTIONS #
def embed(folder):
audio_files = find_audio(folder)
for filepath in audio_files:
print(f"Scanning '{filepath}'")
try:
ext = os.path.splitext(filepath)[1][1:]
audio = init_audio(filepath, ext)
if audio:
print(f"Initialized '{filepath}'")
lrc_filepath = f"{os.path.splitext(filepath)[0]}.lrc"
txt_filepath = f"{os.path.splitext(filepath)[0]}.txt"
tag = find_tag(audio, ext)
if tag:
lyrics = read_lyrics_audio(audio, ext, tag)
if lyrics:
if bool(re.search(r'\[\d{2}:\d{2}\.\d{2}\]|\[\d{1,2}\.\d{2}\]', lyrics)):
print(f"Synced lyrics already in '{filepath}'")
else:
print(f"No synced lyrics embedded, stripping for autotagging")
strip_tags(audio, ext)
## Maybe local file has been updated, embed anyways
embed_lyrics_from_file(audio, ext, tag, lrc_filepath)
else:
embed_lyrics_from_file(audio, ext, tag, lrc_filepath)
else:
print(f"No embedded or usable tag found for '{filepath}'")
else:
print(f"Audio file not readable: '{filepath}'")
except MutagenError as e:
print(f"Error processing '{filepath}': {e}")
def extract(folder):
audio_files = find_audio(folder)
unsynced = []
nolyrics = []
for filepath in audio_files:
try:
ext = os.path.splitext(filepath)[1][1:]
audio = init_audio(filepath, ext)
if audio:
print(f"Initialized '{filepath}'")
tag = find_tag(audio, ext)
if tag:
lyrics = read_lyrics_audio(audio, ext, tag)
if lyrics:
if bool(re.search(r'\[\d{2}:\d{2}\.\d{2}\]|\[\d{1,2}\.\d{2}\]', lyrics)):
lyrics_type = 'lrc'
else:
lyrics_type = 'txt'
unsynced.append(filepath)
output_filepath = f"{os.path.splitext(filepath)[0]}.{lyrics_type}"
if not os.path.isfile(output_filepath):
print(f"Extracting lyrics from '{filepath}'")
with open(output_filepath, 'w', encoding='utf-8') as output:
output.write(f"{lyrics}\n")
else:
print(f"No lyrics tag found for '{filepath}'")
nolyrics.append(filepath)
else:
print(f"Could not read metadata for '{filepath}'")
except MutagenError as e:
print(f"Error processing '{filepath}': {e}")
if unsynced:
print("There are some files with unsynced lyrics \nCheck 'unsynced.txt' for more.")
with open('unsynced.txt', 'w') as output:
for item in unsynced:
output.write(f"{item}\n")
if nolyrics:
print("There are some files with no lyrics \nCheck 'nolyrics.txt' for more.")
with open('nolyrics.txt', 'w') as output:
for item in nolyrics:
output.write(f"{item}\n")
### MAIN FUNCTION ###
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-em', action='store_true')
parser.add_argument('-ex', action='store_true')
parser.add_argument('-d', type=str, required=True)
args = parser.parse_args()
dir = os.path.expanduser(args.d)
if args.em:
print(f"Embedding lyrics in '{dir}'")
embed(dir)
elif args.ex:
print(f"Extracting lyrics in '{dir}'")
extract(dir)
else:
print("No valid option provided.")
if __name__ == "__main__":
main()