106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
|
|
"""
|
|||
|
|
RSS-Bridge kullanarak video metadata çıkarımı
|
|||
|
|
"""
|
|||
|
|
import feedparser
|
|||
|
|
import re
|
|||
|
|
import requests
|
|||
|
|
from urllib.parse import urlencode
|
|||
|
|
from typing import List, Dict, Optional
|
|||
|
|
from datetime import datetime
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_channel_id_from_handle(handle_url: str) -> Optional[str]:
|
|||
|
|
"""
|
|||
|
|
Channel handle URL'inden Channel ID'yi web scraping ile bulur.
|
|||
|
|
Örnek: https://www.youtube.com/@tavakfi -> UC...
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
response = requests.get(handle_url)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
|
|||
|
|
html_content = response.text
|
|||
|
|
|
|||
|
|
# İlk pattern: "externalId":"UC..."
|
|||
|
|
match = re.search(r'"externalId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
|
|||
|
|
if match:
|
|||
|
|
return match.group(1)
|
|||
|
|
|
|||
|
|
# Alternatif pattern: "channelId":"UC..."
|
|||
|
|
match_alt = re.search(r'"channelId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
|
|||
|
|
if match_alt:
|
|||
|
|
return match_alt.group(1)
|
|||
|
|
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
except requests.exceptions.RequestException as e:
|
|||
|
|
raise Exception(f"Error fetching channel page: {e}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def extract_video_id(url: str) -> Optional[str]:
|
|||
|
|
"""YouTube URL'den video ID çıkar"""
|
|||
|
|
patterns = [
|
|||
|
|
r'youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})',
|
|||
|
|
r'youtu\.be/([a-zA-Z0-9_-]{11})',
|
|||
|
|
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
for pattern in patterns:
|
|||
|
|
match = re.search(pattern, url)
|
|||
|
|
if match:
|
|||
|
|
return match.group(1)
|
|||
|
|
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def fetch_videos_from_rss_bridge(base_url: str, channel_id: str,
|
|||
|
|
format: str = "Atom", max_items: int = 100) -> List[Dict]:
|
|||
|
|
"""
|
|||
|
|
RSS-Bridge'den video listesini çek
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
base_url: RSS-Bridge base URL
|
|||
|
|
channel_id: YouTube Channel ID (UC...)
|
|||
|
|
format: Feed format (Atom veya Rss)
|
|||
|
|
max_items: Maksimum video sayısı
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Video metadata listesi
|
|||
|
|
"""
|
|||
|
|
params = {
|
|||
|
|
'action': 'display',
|
|||
|
|
'bridge': 'YoutubeBridge',
|
|||
|
|
'context': 'By channel id',
|
|||
|
|
'c': channel_id,
|
|||
|
|
'format': format
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
feed_url = f"{base_url}/?{urlencode(params)}"
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
feed = feedparser.parse(feed_url)
|
|||
|
|
|
|||
|
|
videos = []
|
|||
|
|
for entry in feed.entries[:max_items]:
|
|||
|
|
video_id = extract_video_id(entry.link)
|
|||
|
|
if not video_id:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Tarih parsing
|
|||
|
|
published_date = None
|
|||
|
|
if hasattr(entry, 'published_parsed') and entry.published_parsed:
|
|||
|
|
published_date = datetime(*entry.published_parsed[:6]).isoformat() + 'Z'
|
|||
|
|
|
|||
|
|
videos.append({
|
|||
|
|
'video_id': video_id,
|
|||
|
|
'video_title': entry.title,
|
|||
|
|
'video_url': entry.link,
|
|||
|
|
'published_at_utc': published_date,
|
|||
|
|
'description': getattr(entry, 'summary', '')
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return videos
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
raise Exception(f"Error fetching RSS-Bridge feed: {e}")
|
|||
|
|
|