better parsing

2025-11-13 03:33:57 +03:00
parent abe170a1f8
commit 763a5a0a01
2 changed files with 4 additions and 5 deletions
--- a/src/rss_generator.py
+++ b/src/rss_generator.py
@@ -60,8 +60,9 @@ class RSSGenerator:
        fe.description(video.get('description', '')[:200])
        
        # Content (tam transcript)
+        # feedgen HTML içeriği için otomatik escaping yapıyor
        if video.get('transcript_clean'):
-            fe.content(content=video['transcript_clean'])
+            fe.content(content=video['transcript_clean'], type='html')
    
    def generate_rss(self, output_path: str):
        """RSS feed'i dosyaya yaz"""
--- a/src/transcript_cleaner.py
+++ b/src/transcript_cleaner.py
@@ -127,10 +127,8 @@ class TranscriptCleaner:
        paragraphs = self.create_paragraphs(sentences, sentences_per_paragraph)
        
        # HTML'e sar
-        html_content = self.wrap_html(paragraphs)
-        
-        # XML entity escaping
-        clean_html = self.escape_xml_entities(html_content)
+        # feedgen zaten XML escaping yapıyor, bu yüzden escape_xml_entities çağrısını kaldırdık
+        clean_html = self.wrap_html(paragraphs)
        
        return raw_text, clean_html