Elber Domingos
Weird script python problem
Rest of the code:
# Main function for Windmill.dev
def main(
url: str, with_timestamps: bool = False, languages: List[str] = ["en", "pt"]
) -> Union[str, Dict]:
# Ensure the languages list includes only 'en' and 'pt'
languages = [lang for lang in languages if lang in ["en", "pt"]]
transcript = get_youtube_transcript(url, languages)
if isinstance(transcript, dict) and "error" in transcript:
return transcript
# If timestamps aren't needed, return just the text
if not with_timestamps:
return transcript
# Return the full transcript if timestamps are needed
return transcript
# Main function for Windmill.dev
def main(
url: str, with_timestamps: bool = False, languages: List[str] = ["en", "pt"]
) -> Union[str, Dict]:
# Ensure the languages list includes only 'en' and 'pt'
languages = [lang for lang in languages if lang in ["en", "pt"]]
transcript = get_youtube_transcript(url, languages)
if isinstance(transcript, dict) and "error" in transcript:
return transcript
# If timestamps aren't needed, return just the text
if not with_timestamps:
return transcript
# Return the full transcript if timestamps are needed
return transcript
6 replies
Weird script python problem
Python code:
from youtube_transcript_api import YouTubeTranscriptApi
import re
from typing import List, Dict, Union
# Function to extract video ID from YouTube URL
def get_video_id(url: str) -> Union[str, None]:
patterns = [
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)",
r"(?:https?:\/\/)?(?:www\.)?youtu\.be\/([^?]+)",
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([^?]+)",
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([^?]+)",
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
# Function to fetch YouTube transcript in specified languages
def get_youtube_transcript(url: str, languages: List[str]) -> Union[str, Dict]:
video_id = get_video_id(url)
if not video_id:
return {"error": "Invalid YouTube URL"}
try:
# Fetch transcript in the specified languages
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
except Exception as e:
return {
"error": str(e),
"message": f"Could not retrieve a transcript for the video ID {video_id}. "
f"Subtitles might be disabled or not available in the specified languages.",
}
# Join all transcript entries into a single string
return " ".join([entry["text"] for entry in transcript])
from youtube_transcript_api import YouTubeTranscriptApi
import re
from typing import List, Dict, Union
# Function to extract video ID from YouTube URL
def get_video_id(url: str) -> Union[str, None]:
patterns = [
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)",
r"(?:https?:\/\/)?(?:www\.)?youtu\.be\/([^?]+)",
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([^?]+)",
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([^?]+)",
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
# Function to fetch YouTube transcript in specified languages
def get_youtube_transcript(url: str, languages: List[str]) -> Union[str, Dict]:
video_id = get_video_id(url)
if not video_id:
return {"error": "Invalid YouTube URL"}
try:
# Fetch transcript in the specified languages
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
except Exception as e:
return {
"error": str(e),
"message": f"Could not retrieve a transcript for the video ID {video_id}. "
f"Subtitles might be disabled or not available in the specified languages.",
}
# Join all transcript entries into a single string
return " ".join([entry["text"] for entry in transcript])
6 replies