跳转至

AI 翻译pdf

import fitz
import sqlite3
import pywebio
from pywebio import config
from pywebio.input import file_upload, input_group
import json

import requests
from pywebio.output import put_file


class maxai:
    def __init__(self, auth="application-ad80edba84c30eb1161ae04e71ee7f78"):
        self.base_url = 'http://ai.tech.intra.com'
        self.auth = auth
        self.headers = {'accept': 'application/json', 'AUTHORIZATION': self.auth}
        self.new_chat = False  # 默认是否创建新的会话
        self.chatid = self.create_chat()
        print(f"\r登录成功-{self.chatid}")

    def chat(self, query):
        if self.new_chat:
            chat_id = self.create_chat()
        else:
            chat_id = self.chatid
        url = self.base_url + "/api/application/chat_message/" + chat_id
        data = {
            "message": query,
            "re_chat": False,
            "stream": True,
        }
        response = requests.post(url, headers=self.headers, json=data, stream=True)
        translated_text = ''

        if response.status_code == 200:
            # 设置响应编码为UTF-8
            response.encoding = 'utf-8'
            # 逐行读取响应内容
            for line in response.iter_lines(decode_unicode=True):
                if line:
                    event_data = json.loads(line[5:])
                    if event_data['is_end'] == False:
                        text = event_data['content']
                        if text:
                            translated_text += text
        return translated_text

    def create_chat(self):
        print("创建对话...", end='', flush=True)
        id = self.login_and_get_appinfo()
        url = self.base_url + "/api/application/" + id + "/chat/open"
        response = requests.get(url, headers=self.headers).json()
        chat_id = response['data']
        return chat_id

    def login_and_get_appinfo(self):
        print("正在登录...", end='', flush=True)
        url = self.base_url + '/api/application/profile'
        response = requests.get(url, headers=self.headers).json()
        id = response['data']['id']
        return id


def convert_color(color_int):
    """将颜色整数转换为RGB格式"""
    r = (color_int >> 16) & 0xFF
    g = (color_int >> 8) & 0xFF
    b = color_int & 0xFF
    return (r / 255, g / 255, b / 255)


def init_db():
    conn = sqlite3.connect('translations_cache.db')
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS translations (
            original_text TEXT PRIMARY KEY,
            translated_text TEXT
        )
    ''')
    conn.commit()
    conn.close()


def get_cached_translation(text):
    conn = sqlite3.connect('translations_cache.db')
    cursor = conn.cursor()
    cursor.execute('SELECT translated_text FROM translations WHERE original_text = ?', (text,))
    result = cursor.fetchone()
    conn.close()
    return result[0] if result else None


def cache_translation(original_text, translated_text):
    conn = sqlite3.connect('translations_cache.db')
    cursor = conn.cursor()
    cursor.execute('INSERT INTO translations (original_text, translated_text) VALUES (?, ?)',
                   (original_text, translated_text))
    conn.commit()
    conn.close()


def translate_text(text):
    cached_translation = get_cached_translation(text)
    if cached_translation:
        return cached_translation

    bbb = ccaam.chat(text)
    return bbb


# 初始化数据库
init_db()
ccaam = maxai()


# 打开 PDF 文件

def trancpdf(pdf_document, filename):
    # 遍历每一页
    for page_num in range(pdf_document.page_count):
        page = pdf_document.load_page(page_num)

        # 获取页面上的所有文本块
        blocks2 = page.get_text("dict", flags=11)["blocks"]

        for block in blocks2:
            for line in block["lines"]:
                for span in line["spans"]:
                    text = span["text"]
                    if text.strip() and len(text) > 10:  # Only translate non-empty text
                        translated_text = translate_text(text)
                        if "抱歉" in translated_text or "翻译" in translated_text:
                            continue
                        font_size = span["size"]
                        color = span["color"]
                        bbox = span["bbox"]
                        rect = fitz.Rect(bbox)
                        page.insert_text((rect.x0, rect.y0), translated_text, fontsize=font_size / 3,
                                         color=convert_color(color), fontname="SimSun", fontfile="SimSun.ttf")
                        print((translated_text, font_size, color, bbox))
                        pdf_document.save("zh_CN/" + filename)
        content = open("zh_CN/" + filename, 'rb').read()
        put_file(filename, content, 'download me')


def sendmd():
    data = input_group("翻译pdf", [
        file_upload(accept=".pdf", placeholder="翻译pdf", name="file"),
    ])
    print(data)
    f = data['file']
    filename = f['filename']
    open(filename, 'wb').write(f['content'])

    if "file" in data:
        pdf_document = fitz.open(filename)
        trancpdf(pdf_document, filename)
        pdf_document.close()


if __name__ == '__main__':
    config(title="工具", theme="yeti", description="yes")  # global configuration
    pywebio.start_server(sendmd, port=1008, cdn=False)