import os
import re
from openai import OpenAI
client = OpenAI()
def translate_text(text_list):
prompt = "Aşağıdaki İngilizce altyazı metinlerini anlamı bozmadan, doğal bir Türkçe ile çevir. Her satırı ayrı bir satır olarak döndür. Sadece çeviriyi ver, başka açıklama yapma.\n\n"
prompt += "\n".join(text_list)
response = client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": "Sen profesyonel bir film altyazı çevirmenisin. Metinleri doğal ve akıcı bir Türkçeye çeviriyorsun."},
{"role": "user", "content": prompt}
]
)
translated = response.choices[0].message.content.strip().split('\n')
# Ensure we have the same number of lines
if len(translated) != len(text_list):
# Fallback or simple logging if mismatch occurs
print(f"Warning: Mismatch in translation count. Expected {len(text_list)}, got {len(translated)}")
return translated
def process_srt(input_path, output_path):
with open(input_path, 'r', encoding='utf-8-sig') as f:
content = f.read()
# Split by double newline to get blocks
blocks = re.split(r'\n\s*\n', content.strip())
translated_blocks = []
batch_size = 40
all_texts = []
block_structures = []
for block in blocks:
lines = block.split('\n')
if len(lines) >= 3:
index = lines[0]
timestamp = lines[1]
text = " ".join(lines[2:]) # Join multiple lines in one block
block_structures.append((index, timestamp))
all_texts.append(text)
else:
# Handle empty or malformed blocks
block_structures.append(None)
all_texts.append("")
total = len(all_texts)
print(f"Total blocks to translate: {total}")
final_translations = []
for i in range(0, total, batch_size):
batch = all_texts[i:i+batch_size]
print(f"Translating batch {i//batch_size + 1}/{(total//batch_size)+1}...")
translated_batch = translate_text(batch)
# If mismatch, try to fix or pad
if len(translated_batch) < len(batch):
translated_batch.extend([""] * (len(batch) - len(translated_batch)))
elif len(translated_batch) > len(batch):
translated_batch = translated_batch[:len(batch)]
final_translations.extend(translated_batch)
with open(output_path, 'w', encoding='utf-8') as f:
for i in range(len(block_structures)):
struct = block_structures[i]
if struct:
index, timestamp = struct
text = final_translations[i]
f.write(f"{index}\n{timestamp}\n{text}\n\n")
if __name__ == "__main__":
process_srt('/home/ubuntu/upload/en.srt', '/home/ubuntu/tr.srt')