Logos Added a class

Added a class to , Linux Servers Set Up & Management

First of all if running the YT - transcriber we need to make sure the service is running within the venv environment.

[Unit]
Description= flastai engine client
After=network.target

[Service]
User=flastai
WorkingDirectory=/var/www/html/BlogEngine_v5.0/

# ExecStart=/var/www/html/BlogEngine_v5.0/venv/bin/streamlit run home.py --server.port 30000
ExecStart=/bin/bash -c 'source venv/bin/activate && /var/www/html/BlogEngine_v5.0/venv/bin/streamlit run home.py --server.port 30000'

Restart=always

[Install]
WantedBy=multi-user.target

New Code (inside home.py) :

class Document:
    def __init__(self):
        self.text = ""


    def set_text(self, text):
        self.text = text


def transcribing_function(video_id):
    st.warning("Transcribing video...")


    
    # Define the path to the Whisper model files and audio folder
    whisper_model_path = f"../{folder_root}/"
    audio_path = f"{folder_audio}/{video_id}.mp3"
    
    # Execute yt-dlp command to download and convert video audio to mp3
    result = subprocess.run(
        [yt_dlp_path, '-f', 'bestaudio', '-o', audio_path, f"https://www.youtube.com/watch?v={video_id}"],
        capture_output=True,
        text=True
    )
    
    # Check for errors in the subprocess execution
    if result.returncode != 0:
        st.error("Failed to download and convert video.")
        st.error(result.stderr)
        return


    # Load Whisper model
    model = whisper.load_model("base.en", download_root=whisper_model_path)


    # Transcribe the audio file
    try:
        transcription_result = model.transcribe(audio_path)
        text = transcription_result['text'].strip()
    except Exception as e:
        st.error(f"Error transcribing video: {str(e)}")
        return


    # Split the text into parts
    max_chars = 28000
    parts = [text[i:i + max_chars] for i in range(0, len(text), max_chars)]
    docs = []
    for part in parts:
        doc = Document()
        doc.set_text(part)
        docs.append(doc)


    # Save each part to a separate file
    for i, doc in enumerate(docs):
        filename = os.path.join(folder_files, f"transcribed-{video_id}_{i}.txt")
        with open(filename, "w", encoding="utf-8") as f:
            f.write(doc.text)
        st.success(f"Transcribed chunk_{i} saved.")


    # Assuming right_column is defined correctly elsewhere
    with right_column:
        # Get and display transcribed files
        transcribed_files = [file for file in os.listdir(folder_files) if file.endswith(".txt") and video_id in file]
        if not transcribed_files:
            st.error("No transcribed files found.")
            return


        st.success("Video transcribed and split into parts successfully.")
        num_columns = min(len(transcribed_files), 3)
        columns = st.columns(num_columns, gap="small")


        for i, column in enumerate(columns):
            for j in range(i, len(transcribed_files), num_columns):
                file_path = os.path.join(folder_files, transcribed_files[j])
                with open(file_path, "rb") as f:
                    contents = f.read()
                    encoded = base64.b64encode(contents).decode()
                    href = f'<a href="data:application/octet-stream;base64,{encoded}" download="{transcribed_files[j]}" target="_blank">{transcribed_files[j]}</a>'
                    column.markdown(href, unsafe_allow_html=True)


        with st.expander("Transcribed files (Click to hide/unhide text)"):
            for filename in transcribed_files:
                with open(os.path.join(folder_files, filename), "r") as file:
                    file_contents = file.read()
                    st.success(f"File name: {filename}\n\n{file_contents}")


    return docs

Configuration - Requirements.

Activate the virtual environment:

Bash

 source /var/www/html/BlogEngine_v5.0/venv/bin/activate

Uninstall Whisper:Bash

pip uninstall whisper

Increase memory

sudo mount -o remount,size=6G /tmp

Reinstall Whisper: Make sure you're installing the latest version.

pip install -U openai-whisper --no-cache-dir