mirror of
				https://github.com/devine-dl/devine.git
				synced 2025-11-04 03:44:49 +00:00 
			
		
		
		
	Unescape HTML Entities in Subtitles after Downloading
This fixes some Subtitles having e.g., `&` instead of just `&`, but especially for special entities like `‏` which enables Right-to-Left mode on Hebrew and Arabic Subtitles.
This commit is contained in:
		
							parent
							
								
									26d067915f
								
							
						
					
					
						commit
						2056e056a4
					
				@ -913,6 +913,7 @@ class dl:
 | 
			
		||||
                        if isinstance(track, Subtitle):
 | 
			
		||||
                            track_data = track.path.read_bytes()
 | 
			
		||||
                            track_data = try_ensure_utf8(track_data)
 | 
			
		||||
                            track_data = html.unescape(track_data.decode("utf8")).encode("utf8")
 | 
			
		||||
                            track.path.write_bytes(track_data)
 | 
			
		||||
 | 
			
		||||
                        progress(downloaded="Downloaded")
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,7 @@
 | 
			
		||||
from __future__ import annotations
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
import html
 | 
			
		||||
import logging
 | 
			
		||||
import math
 | 
			
		||||
import re
 | 
			
		||||
@ -473,6 +474,7 @@ class DASH:
 | 
			
		||||
                        track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
 | 
			
		||||
                    ):
 | 
			
		||||
                        segment_data = try_ensure_utf8(segment_data)
 | 
			
		||||
                        segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8")
 | 
			
		||||
                    f.write(segment_data)
 | 
			
		||||
                    segment_file.unlink()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
from __future__ import annotations
 | 
			
		||||
 | 
			
		||||
import html
 | 
			
		||||
import logging
 | 
			
		||||
import re
 | 
			
		||||
import sys
 | 
			
		||||
@ -314,6 +315,8 @@ class HLS:
 | 
			
		||||
                segment_data = segment_file.read_bytes()
 | 
			
		||||
                if isinstance(track, Subtitle):
 | 
			
		||||
                    segment_data = try_ensure_utf8(segment_data)
 | 
			
		||||
                    if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
 | 
			
		||||
                        segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8")
 | 
			
		||||
                f.write(segment_data)
 | 
			
		||||
                segment_file.unlink()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user