diff --git a/src/ccextractor.c b/src/ccextractor.c index ea34aad9..526f00f5 100644 --- a/src/ccextractor.c +++ b/src/ccextractor.c @@ -79,6 +79,8 @@ void init_options (struct ccx_s_options *options) options->nofontcolor=0; // 1 = don't put tags options->notypesetting=0; // 1 = Don't put , , etc typesetting tags + options->no_bom = 0; // Use BOM by default. + options->settings_608.direct_rollup = 0; options->settings_608.no_rollup = 0; options->settings_608.force_rollup = 0; @@ -456,22 +458,27 @@ int main(int argc, char *argv[]) } switch (ccx_options.write_format) { - case CCX_OF_RAW: - writeraw (BROADCAST_HEADER,sizeof (BROADCAST_HEADER),&wbout1); - break; - case CCX_OF_DVDRAW: - break; - case CCX_OF_RCWT: - if( init_encoder(enc_ctx,&wbout1) ) - fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory\n"); - break; - default: - if (ccx_options.encoding==CCX_ENC_UTF_8) // Write BOM - writeraw (UTF8_BOM, sizeof (UTF8_BOM), &wbout1); - if (ccx_options.encoding==CCX_ENC_UNICODE) // Write BOM - writeraw (LITTLE_ENDIAN_BOM, sizeof (LITTLE_ENDIAN_BOM), &wbout1); - if( init_encoder(enc_ctx,&wbout1) ) - fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory\n"); + case CCX_OF_RAW: + writeraw(BROADCAST_HEADER, sizeof(BROADCAST_HEADER), &wbout1); + break; + case CCX_OF_DVDRAW: + break; + case CCX_OF_RCWT: + if (init_encoder(enc_ctx, &wbout1)) + fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory\n"); + break; + default: + if (!ccx_options.no_bom){ + if (ccx_options.encoding == CCX_ENC_UTF_8){ // Write BOM + writeraw(UTF8_BOM, sizeof(UTF8_BOM), &wbout1); + } + if (ccx_options.encoding == CCX_ENC_UNICODE){ // Write BOM + writeraw(LITTLE_ENDIAN_BOM, sizeof(LITTLE_ENDIAN_BOM), &wbout1); + } + } + if (init_encoder(enc_ctx, &wbout1)){ + fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory\n"); + } } } if (ccx_options.extract == 12 && ccx_options.write_format != CCX_OF_RAW) @@ -517,12 +524,17 @@ int main(int argc, char *argv[]) fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory\n"); break; default: - if (ccx_options.encoding==CCX_ENC_UTF_8) // Write BOM - writeraw (UTF8_BOM, sizeof (UTF8_BOM), &wbout2); - if (ccx_options.encoding==CCX_ENC_UNICODE) // Write BOM - writeraw (LITTLE_ENDIAN_BOM, sizeof (LITTLE_ENDIAN_BOM), &wbout2); - if( init_encoder(enc_ctx+1,&wbout2) ) - fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory\n"); + if (!ccx_options.no_bom){ + if (ccx_options.encoding == CCX_ENC_UTF_8){ // Write BOM + writeraw(UTF8_BOM, sizeof(UTF8_BOM), &wbout2); + } + if (ccx_options.encoding == CCX_ENC_UNICODE){ // Write BOM + writeraw(LITTLE_ENDIAN_BOM, sizeof(LITTLE_ENDIAN_BOM), &wbout2); + } + } + if (init_encoder(enc_ctx + 1, &wbout2)){ + fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory\n"); + } } } } diff --git a/src/ccextractor.h b/src/ccextractor.h index 6691da3f..1a559087 100644 --- a/src/ccextractor.h +++ b/src/ccextractor.h @@ -42,6 +42,8 @@ struct ccx_s_options // Options from user parameters struct ccx_boundary_time extraction_start, extraction_end; // Segment we actually process int print_file_reports; + int no_bom; // Set to 1 when no BOM (Byte Order Mark) should be used for files. Note, this might make files unreadable in windows! + ccx_decoder_608_settings settings_608; // Contains the settings for the 608 decoder. /* subtitle codec type */ @@ -107,8 +109,7 @@ struct ccx_s_options // Options from user parameters char *srv_port; int line_terminator_lf; // 0 = CRLF, 1=LF int noautotimeref; // Do NOT set time automatically? - enum ccx_datasource input_source; // Files, stdin or network - + enum ccx_datasource input_source; // Files, stdin or network }; struct ts_payload diff --git a/src/params.c b/src/params.c index 0257a163..771c2014 100644 --- a/src/params.c +++ b/src/params.c @@ -348,7 +348,7 @@ void usage (void) mprint (" -1, -2, -12: Output Field 1 data, Field 2 data, or both\n"); mprint (" (DEFAULT is -1)\n"); mprint (" -cc2: When in srt/sami mode, process captions in channel 2\n"); - mprint (" instead channel 1.\n"); + mprint (" instead of channel 1.\n"); mprint ("-svc --service N,N...: Enabled CEA-708 captions processing for the listed\n"); mprint (" services. The parameter is a command delimited list\n"); mprint (" of services numbers, such as \"1,2\" to process the\n"); @@ -477,6 +477,8 @@ void usage (void) mprint (" affects Teletext in timed transcript with -datets.\n"); mprint ("\n"); mprint ("Options that affect what kind of output will be produced:\n"); + mprint(" -nobom: Do not append a BOM (Byte Order Mark) to output files."); + mprint(" Note that this may break files when using Windows."); mprint (" -unicode: Encode subtitles in Unicode instead of Latin-1.\n"); mprint (" -utf8: Encode subtitles in UTF-8 (no longer needed.\n"); mprint (" because UTF-8 is now the default).\n"); @@ -863,6 +865,9 @@ void parse_parameters (int argc, char *argv[]) ccx_options.nofontcolor=1; continue; } + if (strcmp(argv[i], "-nobom") == 0){ + ccx_options.no_bom = 1; + } if (strcmp (argv[i],"-nots")==0 || strcmp (argv[i],"--notypesetting")==0) { @@ -1425,6 +1430,7 @@ void parse_parameters (int argc, char *argv[]) if (strcmp (argv[i],"-UCLA")==0 || strcmp (argv[i],"-ucla")==0) { ccx_options.millis_separator='.'; + ccx_options.no_bom = 1; if (!ccx_options.transcript_settings.isFinal){ ccx_options.transcript_settings.showStartTime = 1; ccx_options.transcript_settings.showEndTime = 1;