mirror of
				https://github.com/devine-dl/devine.git
				synced 2025-11-04 11:54:50 +00:00 
			
		
		
		
	Add convert() method to Subtitle class
This commit is contained in:
		
							parent
							
								
									f4d8bc8dd0
								
							
						
					
					
						commit
						e76bc7201d
					
				@ -5,6 +5,7 @@ import subprocess
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
from enum import Enum
 | 
			
		||||
from io import BytesIO
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import Any, Iterable, Optional
 | 
			
		||||
 | 
			
		||||
import pycaption
 | 
			
		||||
@ -143,6 +144,76 @@ class Subtitle(Track):
 | 
			
		||||
            track_name += flag
 | 
			
		||||
        return track_name or None
 | 
			
		||||
 | 
			
		||||
    def convert(self, codec: Subtitle.Codec) -> Path:
 | 
			
		||||
        """
 | 
			
		||||
        Convert this Subtitle to another Format.
 | 
			
		||||
 | 
			
		||||
        The file path location of the Subtitle data will be kept at the same
 | 
			
		||||
        location but the file extension will be changed appropriately.
 | 
			
		||||
 | 
			
		||||
        Supported formats:
 | 
			
		||||
        - SubRip - SubtitleEdit or pycaption.SRTWriter
 | 
			
		||||
        - TimedTextMarkupLang - SubtitleEdit or pycaption.DFXPWriter
 | 
			
		||||
        - WebVTT - SubtitleEdit or pycaption.WebVTTWriter
 | 
			
		||||
        - SubStationAlphav4 - SubtitleEdit
 | 
			
		||||
        - fTTML* - custom code using some pycaption functions
 | 
			
		||||
        - fVTT* - custom code using some pycaption functions
 | 
			
		||||
        *: Can read from format, but cannot convert to format
 | 
			
		||||
 | 
			
		||||
        Note: It currently prioritizes using SubtitleEdit over PyCaption as
 | 
			
		||||
        I have personally noticed more oddities with PyCaption parsing over
 | 
			
		||||
        SubtitleEdit. Especially when working with TTML/DFXP where it would
 | 
			
		||||
        often have timecodes and stuff mixed in/duplicated.
 | 
			
		||||
 | 
			
		||||
        Returns the new file path of the Subtitle.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.path or not self.path.exists():
 | 
			
		||||
            raise ValueError("You must download the subtitle track first.")
 | 
			
		||||
 | 
			
		||||
        if self.codec == codec:
 | 
			
		||||
            return self.path
 | 
			
		||||
 | 
			
		||||
        output_path = self.path.with_suffix(f".{codec.value.lower()}")
 | 
			
		||||
 | 
			
		||||
        sub_edit_executable = get_binary_path("SubtitleEdit")
 | 
			
		||||
        if sub_edit_executable and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
 | 
			
		||||
            sub_edit_format = {
 | 
			
		||||
                Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
 | 
			
		||||
                Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0"
 | 
			
		||||
            }.get(codec, codec.name)
 | 
			
		||||
            subprocess.run(
 | 
			
		||||
                [
 | 
			
		||||
                    sub_edit_executable,
 | 
			
		||||
                    "/Convert", self.path, sub_edit_format,
 | 
			
		||||
                    f"/outputfilename:{output_path.name}",
 | 
			
		||||
                    f"/outputfolder:{output_path.parent}",
 | 
			
		||||
                    "/encoding:utf8"
 | 
			
		||||
                ],
 | 
			
		||||
                check=True,
 | 
			
		||||
                stdout=subprocess.DEVNULL,
 | 
			
		||||
                stderr=subprocess.DEVNULL
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            writer = {
 | 
			
		||||
                # pycaption generally only supports these subtitle formats
 | 
			
		||||
                Subtitle.Codec.SubRip: pycaption.SRTWriter,
 | 
			
		||||
                Subtitle.Codec.TimedTextMarkupLang: pycaption.DFXPWriter,
 | 
			
		||||
                Subtitle.Codec.WebVTT: pycaption.WebVTTWriter,
 | 
			
		||||
            }.get(codec)
 | 
			
		||||
            if writer is None:
 | 
			
		||||
                raise NotImplementedError(f"Cannot yet convert {self.codec.name} to {codec.name}.")
 | 
			
		||||
 | 
			
		||||
            caption_set = self.parse(self.path.read_bytes(), self.codec)
 | 
			
		||||
            Subtitle.merge_same_cues(caption_set)
 | 
			
		||||
            subtitle_text = writer().write(caption_set)
 | 
			
		||||
 | 
			
		||||
            output_path.write_text(subtitle_text, encoding="utf8")
 | 
			
		||||
 | 
			
		||||
        self.swap(output_path)
 | 
			
		||||
        self.codec = codec
 | 
			
		||||
 | 
			
		||||
        return output_path
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
 | 
			
		||||
        if not isinstance(data, bytes):
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user