Back again with another update, transitioning to Python (still learning, eventually [hopefully] I’ll get to the point where this can be a plugin).
I’m leaving the old one up just in case anyone wants something that’ll run without python (though you’d still need ffmpeg and ffprobe).
Now I’m at a point where I have an import.sh
file which calls a import.py
script.
import.sh:
#!/bin/sh
# Get the directory of the script
IMPORT="~/Downloads/Music"
LIBRARY="~/Music"
# Embed synced lyrics into files before importing with beats
# Strip unsynced lyrics from files
python3 ./import.py -em -dir "$IMPORT"
# Import all Music to Library
source "$ROOT_DIR/beets/bin/activate"
beet update
beet import "$IMPORT"
beet convert -y
deactivate
# Export lyrics from media files now that they're tagged
python3 ./import.py -ex -dir "$LIBRARY"
import.py:
import os, glob, re, argparse
from mutagen import File, MutagenError
extensions = ['flac', 'mp3', 'm4a']
lyrics_tags = ['LYRICS', 'lyrics', '\xa9lyr']
extension_to_tag = {
'flac': 'LYRICS',
'mp3': 'lyrics',
'm4a': '\xa9lyr'
}
def get_tag_for_extension(extension):
return extension_to_tag.get(extension, None)
def find_tag(audio, lyrics_tags):
for tag in lyrics_tags:
try:
if tag in audio:
return tag
except ValueError:
continue # Ignore the error and continue checking other tags
return None
def find_audio(folder):
print(f"Scanning '{folder}'")
audio_files = []
for ext in extensions:
audio_files.extend(glob.glob(f'{folder}/**/*.{ext}', recursive=True))
return audio_files
def embed_lyrics_from_file(filepath, audio, lrc_filename, tag):
if os.path.isfile(lrc_filename):
with open(lrc_filename, 'r', encoding='utf-8') as lrc_file:
lyrics = lrc_file.read()
if lyrics:
audio[tag] = lyrics
audio.save()
print(f"Embedded synced lyrics in '{filepath}'")
os.remove(lrc_filename)
print(f"Deleted '{lrc_filename}' after embedding lyrics.")
def embed_lyrics(importdir):
audio_files = find_audio(importdir)
for filepath in audio_files:
try:
audio = File(filepath)
if audio:
lrc_filename = f"{os.path.splitext(filepath)[0]}.lrc"
tag = find_tag(audio, lyrics_tags);
if tag:
lyrics = audio.get(tag)
if lyrics:
lyrics_str = lyrics[0] if isinstance(lyrics, list) else lyrics
if bool(re.search(r'\[\d{2}:\d{2}\.\d{2}\]|\[\d{1,2}\.\d{2}\]', lyrics_str)):
print(f"Synced lyrics already in '{filepath}'")
else:
print(f"Stripping lyrics from '{filepath}'")
del audio[tag]
audio.save()
print(f"Stripped lyrics from '{filepath}'")
# No harm in embedding it
embed_lyrics_from_file(filepath, audio, lrc_filename, tag)
else:
# Get the extension without the dot
ext = os.path.splitext(filepath)[1][1:]
tag = get_tag_for_extension(ext)
if tag:
embed_lyrics_from_file(filepath, audio, lrc_filename, tag)
else:
print(f"No tag found for '{filepath}' and no extension map available")
else:
print(f"Audio file not readable: '{filepath}'")
except MutagenError as e:
print(f"Error processing '{filepath}': {e}")
def extract_lyrics(musicdir):
audio_files = find_audio(musicdir)
unsynced = []
nolyrics = []
for filepath in audio_files:
try:
audio = File(filepath)
if audio:
tag = find_tag(audio, lyrics_tags);
if tag:
lyrics = audio.get(tag)
if lyrics:
lyrics_str = lyrics[0] if isinstance(lyrics, list) else lyrics
if bool(re.search(r'\[\d{2}:\d{2}\.\d{2}\]|\[\d{1,2}\.\d{2}\]', lyrics_str)):
lyrics_type = 'lrc'
else:
lyrics_type = 'txt'
unsynced.append(filepath)
output_filename = f"{os.path.splitext(filepath)[0]}.{lyrics_type}"
if not os.path.isfile(output_filename):
with open(output_filename, 'w', encoding='utf-8') as output:
output.write(f"{lyrics_str}\n")
else:
print(f"No lyrics found for '{filepath}'")
nolyrics.append(filepath)
else:
print(f"No lyrics tag found for '{filepath}'")
nolyrics.append(filepath)
else:
print(f"Could not read metadata for '{filepath}'")
except MutagenError as e:
print(f"Error processing '{filepath}': {e}")
if unsynced:
print("There are some files with unsynced lyrics \nCheck 'unsynced.txt' for more.")
with open('unsynced.txt', 'w') as output:
for item in unsynced:
output.write(f"{item}\n")
if nolyrics:
print("There are some files with no lyrics \nCheck 'nolyrics.txt' for more.")
with open('nolyrics.txt', 'w') as output:
for item in nolyrics:
output.write(f"{item}\n")
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-em', action='store_true')
parser.add_argument('-ex', action='store_true')
parser.add_argument('-dir', type=str, required=True)
args = parser.parse_args()
directory = os.path.expanduser(args.dir)
if args.em:
print(f"Embedding lyrics in '{directory}'")
embed_lyrics(directory)
elif args.ex:
print(f"Extracting lyrics in '{directory}'")
extract_lyrics(directory)
else:
print("No valid option provided.")
if __name__ == "__main__":
main()
Script is set up to either embed or extract lyrics from audio files using the mutagen library, with the option to process multiple audio file types (flac, m4a, mp3)
. It’s driven by command-line arguments, where -em
triggers embedding lyrics, and -ex
triggers extracting lyrics -dir "string"
specifies the directory to use.
It also puts a list of files without lyrics and unsynced lyrics next to the script itself.