first commit

2025-11-13 03:25:21 +03:00
commit abe170a1f8
21 changed files with 2198 additions and 0 deletions
--- a/src/video_fetcher.py
+++ b/src/video_fetcher.py
@@ -0,0 +1,105 @@
+"""
+RSS-Bridge kullanarak video metadata çıkarımı
+"""
+import feedparser
+import re
+import requests
+from urllib.parse import urlencode
+from typing import List, Dict, Optional
+from datetime import datetime
+
+
+def get_channel_id_from_handle(handle_url: str) -> Optional[str]:
+    """
+    Channel handle URL'inden Channel ID'yi web scraping ile bulur.
+    Örnek: https://www.youtube.com/@tavakfi -> UC...
+    """
+    try:
+        response = requests.get(handle_url)
+        response.raise_for_status()
+        
+        html_content = response.text
+        
+        # İlk pattern: "externalId":"UC..."
+        match = re.search(r'"externalId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
+        if match:
+            return match.group(1)
+        
+        # Alternatif pattern: "channelId":"UC..."
+        match_alt = re.search(r'"channelId":"(UC[a-zA-Z0-9_-]{22})"', html_content)
+        if match_alt:
+            return match_alt.group(1)
+        
+        return None
+        
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"Error fetching channel page: {e}")
+
+
+def extract_video_id(url: str) -> Optional[str]:
+    """YouTube URL'den video ID çıkar"""
+    patterns = [
+        r'youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})',
+        r'youtu\.be/([a-zA-Z0-9_-]{11})',
+        r'youtube\.com/embed/([a-zA-Z0-9_-]{11})'
+    ]
+    
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1)
+    
+    return None
+
+
+def fetch_videos_from_rss_bridge(base_url: str, channel_id: str, 
+                                 format: str = "Atom", max_items: int = 100) -> List[Dict]:
+    """
+    RSS-Bridge'den video listesini çek
+    
+    Args:
+        base_url: RSS-Bridge base URL
+        channel_id: YouTube Channel ID (UC...)
+        format: Feed format (Atom veya Rss)
+        max_items: Maksimum video sayısı
+    
+    Returns:
+        Video metadata listesi
+    """
+    params = {
+        'action': 'display',
+        'bridge': 'YoutubeBridge',
+        'context': 'By channel id',
+        'c': channel_id,
+        'format': format
+    }
+    
+    feed_url = f"{base_url}/?{urlencode(params)}"
+    
+    try:
+        feed = feedparser.parse(feed_url)
+        
+        videos = []
+        for entry in feed.entries[:max_items]:
+            video_id = extract_video_id(entry.link)
+            if not video_id:
+                continue
+            
+            # Tarih parsing
+            published_date = None
+            if hasattr(entry, 'published_parsed') and entry.published_parsed:
+                published_date = datetime(*entry.published_parsed[:6]).isoformat() + 'Z'
+            
+            videos.append({
+                'video_id': video_id,
+                'video_title': entry.title,
+                'video_url': entry.link,
+                'published_at_utc': published_date,
+                'description': getattr(entry, 'summary', '')
+            })
+        
+        return videos
+        
+    except Exception as e:
+        raise Exception(f"Error fetching RSS-Bridge feed: {e}")
+