first commit

This commit is contained in:
salvacybersec
2025-11-13 03:25:21 +03:00
commit abe170a1f8
21 changed files with 2198 additions and 0 deletions

105
src/video_fetcher.py Normal file
View File

@@ -0,0 +1,105 @@
"""
RSS-Bridge kullanarak video metadata çıkarımı
"""
import feedparser
import re
import requests
from urllib.parse import urlencode
from typing import List, Dict, Optional
from datetime import datetime
def get_channel_id_from_handle(handle_url: str) -> Optional[str]:
"""
Channel handle URL'inden Channel ID'yi web scraping ile bulur.
Örnek: https://www.youtube.com/@tavakfi -> UC...
"""
try:
response = requests.get(handle_url)
response.raise_for_status()
html_content = response.text
# İlk pattern: "externalId":"UC..."
match = re.search(r'"externalId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
if match:
return match.group(1)
# Alternatif pattern: "channelId":"UC..."
match_alt = re.search(r'"channelId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
if match_alt:
return match_alt.group(1)
return None
except requests.exceptions.RequestException as e:
raise Exception(f"Error fetching channel page: {e}")
def extract_video_id(url: str) -> Optional[str]:
"""YouTube URL'den video ID çıkar"""
patterns = [
r'youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})',
r'youtu\.be/([a-zA-Z0-9_-]{11})',
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def fetch_videos_from_rss_bridge(base_url: str, channel_id: str,
format: str = "Atom", max_items: int = 100) -> List[Dict]:
"""
RSS-Bridge'den video listesini çek
Args:
base_url: RSS-Bridge base URL
channel_id: YouTube Channel ID (UC...)
format: Feed format (Atom veya Rss)
max_items: Maksimum video sayısı
Returns:
Video metadata listesi
"""
params = {
'action': 'display',
'bridge': 'YoutubeBridge',
'context': 'By channel id',
'c': channel_id,
'format': format
}
feed_url = f"{base_url}/?{urlencode(params)}"
try:
feed = feedparser.parse(feed_url)
videos = []
for entry in feed.entries[:max_items]:
video_id = extract_video_id(entry.link)
if not video_id:
continue
# Tarih parsing
published_date = None
if hasattr(entry, 'published_parsed') and entry.published_parsed:
published_date = datetime(*entry.published_parsed[:6]).isoformat() + 'Z'
videos.append({
'video_id': video_id,
'video_title': entry.title,
'video_url': entry.link,
'published_at_utc': published_date,
'description': getattr(entry, 'summary', '')
})
return videos
except Exception as e:
raise Exception(f"Error fetching RSS-Bridge feed: {e}")