first commit
This commit is contained in:
105
src/video_fetcher.py
Normal file
105
src/video_fetcher.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
RSS-Bridge kullanarak video metadata çıkarımı
|
||||
"""
|
||||
import feedparser
|
||||
import re
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def get_channel_id_from_handle(handle_url: str) -> Optional[str]:
|
||||
"""
|
||||
Channel handle URL'inden Channel ID'yi web scraping ile bulur.
|
||||
Örnek: https://www.youtube.com/@tavakfi -> UC...
|
||||
"""
|
||||
try:
|
||||
response = requests.get(handle_url)
|
||||
response.raise_for_status()
|
||||
|
||||
html_content = response.text
|
||||
|
||||
# İlk pattern: "externalId":"UC..."
|
||||
match = re.search(r'"externalId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
# Alternatif pattern: "channelId":"UC..."
|
||||
match_alt = re.search(r'"channelId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
|
||||
if match_alt:
|
||||
return match_alt.group(1)
|
||||
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"Error fetching channel page: {e}")
|
||||
|
||||
|
||||
def extract_video_id(url: str) -> Optional[str]:
|
||||
"""YouTube URL'den video ID çıkar"""
|
||||
patterns = [
|
||||
r'youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})',
|
||||
r'youtu\.be/([a-zA-Z0-9_-]{11})',
|
||||
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})'
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def fetch_videos_from_rss_bridge(base_url: str, channel_id: str,
|
||||
format: str = "Atom", max_items: int = 100) -> List[Dict]:
|
||||
"""
|
||||
RSS-Bridge'den video listesini çek
|
||||
|
||||
Args:
|
||||
base_url: RSS-Bridge base URL
|
||||
channel_id: YouTube Channel ID (UC...)
|
||||
format: Feed format (Atom veya Rss)
|
||||
max_items: Maksimum video sayısı
|
||||
|
||||
Returns:
|
||||
Video metadata listesi
|
||||
"""
|
||||
params = {
|
||||
'action': 'display',
|
||||
'bridge': 'YoutubeBridge',
|
||||
'context': 'By channel id',
|
||||
'c': channel_id,
|
||||
'format': format
|
||||
}
|
||||
|
||||
feed_url = f"{base_url}/?{urlencode(params)}"
|
||||
|
||||
try:
|
||||
feed = feedparser.parse(feed_url)
|
||||
|
||||
videos = []
|
||||
for entry in feed.entries[:max_items]:
|
||||
video_id = extract_video_id(entry.link)
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
# Tarih parsing
|
||||
published_date = None
|
||||
if hasattr(entry, 'published_parsed') and entry.published_parsed:
|
||||
published_date = datetime(*entry.published_parsed[:6]).isoformat() + 'Z'
|
||||
|
||||
videos.append({
|
||||
'video_id': video_id,
|
||||
'video_title': entry.title,
|
||||
'video_url': entry.link,
|
||||
'published_at_utc': published_date,
|
||||
'description': getattr(entry, 'summary', '')
|
||||
})
|
||||
|
||||
return videos
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error fetching RSS-Bridge feed: {e}")
|
||||
|
||||
Reference in New Issue
Block a user