Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

对Python版本的一些小魔改 #6

Open
yl12053 opened this issue Jun 21, 2021 · 4 comments
Open

对Python版本的一些小魔改 #6

yl12053 opened this issue Jun 21, 2021 · 4 comments

Comments

@yl12053
Copy link

yl12053 commented Jun 21, 2021

python版本带了一个js脚本用来计算ttk
但是需要跟一个js运行库
所以我尝试将那个js脚本翻译成Python脚本并且融合到同一个脚本中
代码如下

#! /usr/bin/env python
# -*- coding:utf-8 -*-

import urllib.request
import urllib.parse
import json
import re
import ssl
import ctypes
ssl._create_default_https_context = ssl._create_unverified_context


class GoogleTrans(object):
    def __init__(self):
        self.url = 'https://translate.google.cn/translate_a/single'
        self.TKK = "434674.96463358"  # 随时都有可能需要更新的TKK值
        
        self.header = {
            "accept": "*/*",
            "accept-language": "zh-CN,zh;q=0.9",
            "cookie": "NID=188=M1p_rBfweeI_Z02d1MOSQ5abYsPfZogDrFjKwIUbmAr584bc9GBZkfDwKQ80cQCQC34zwD4ZYHFMUf4F59aDQLSc79_LcmsAihnW0Rsb1MjlzLNElWihv-8KByeDBblR2V1kjTSC8KnVMe32PNSJBQbvBKvgl4CTfzvaIEgkqss",
            "referer": "https://translate.google.cn/",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
            "x-client-data": "CJK2yQEIpLbJAQjEtskBCKmdygEIqKPKAQi5pcoBCLGnygEI4qjKAQjxqcoBCJetygEIza3KAQ==",
        }
        
        self.data = {
            "client": "webapp",  # 基于网页访问服务器
            "sl": "auto",  # 源语言,auto表示由谷歌自动识别
            "tl": "vi",  # 翻译的目标语言
            "hl": "zh-CN",  # 界面语言选中文,毕竟URL都是cn后缀了,就不装美国人了
            "dt": ["at", "bd", "ex", "ld", "md", "qca", "rw", "rm", "ss", "t"],  # dt表示要求服务器返回的数据类型
            "otf": "2", 
            "ssel": "0",
            "tsel": "0",
            "kc": "1",
            "tk": "",  # 谷歌服务器会核对的token
            "q": ""  # 待翻译的字符串
        }
    class js_fun():
        def rshift(self, val, n): return (val % 0x100000000) >> n
        def Number(self, val):
            try:
                return eval(val, {}, {})
            except:
                return 0
        class Undefined:
            def __init__():
                pass
        class js_array():
            def __init__(self, outer, init=[]):
                self.outer = outer
                self.storage = list(init).copy()
            def __getitem__(self, key):
                if (type(key).__name__ != 'int'):
                    if (type(key).__name__ == 'float') and int(key) != key:
                        return 0
                    try:
                        key = int(key)
                    except:
                        return 0
                if len(self.storage)<=key or key<0:
                    return 0
                return self.storage[key]
            def __setitem__(self, key, value):
                if (type(key).__name__ != 'int'):
                    if (type(key).__name__ == 'float') and int(key) != key:
                        return 0
                    try:
                        key = int(key)
                    except:
                        return 0
                if key<0:
                    return 0
                while key >= len(self.storage):
                    self.storage.append(0)
                self.storage[key] = value
                return
            def __len__(self):
                return len(self.storage)
            def __str__(self):
                return self.storage.__str__()
            def __repr__(self):
                return self.storage.__repr__()
        def array(self, init = []):
            return self.js_array(self, init)
        def uo(self, a, b):
            for c in range(0, len(b)-2, 3):
                d = b[c+2]
                if 'a' <= d:
                    d = ord(d)-87
                else:
                    d = self.Number(d)
                if '+' == b[c+1]:
                    d = self.rshift(a, d)
                else:
                    d = a<<d
                if b[c] == "+":
                    a = a + d & 4294967295
                else:
                    a = a ^ d
            return a
        def wo(self, a, tkk):
            d = self.array(init = tkk.split("."))
            b = self.Number(d[0])
            e = self.array()
            f = 0
            g = 0
            while g < len(a):
                h = ord(a[g])
                if 128 > h:
                    e[f] = h
                    f += 1
                else:
                    if 2048 > h:
                        e[f] = h >> 6 | 192
                        f += 1
                    else:
                        if (55296 == (h & 64512)) and (g + 1 < len(a)) and (56320 == (ord(a[g+1]) & 64512)):
                            h = 65536 + ((h & 1023) << 10) + (ord(a[g+1]) & 1023)
                            g += 1
                            e[f] = h >> 18 | 240
                            f += 1
                            e[f] = h >> 12 & 63 | 128
                            f += 1
                        else:
                            e[f] = h >> 12 | 224
                            f += 1
                            e[f] = h >> 6 & 63 | 128
                            f += 1
                    e[f] = h & 63 | 128
                    f += 1
                g += 1
            a = b
            
            for f in range(0, len(e)):
                a += e[f]
                a = ctypes.c_long(a).value
                a = self.uo(a, '+-a^+6')
            a = self.uo(a, '+-3^+b+-f')
            a ^= self.Number(d[1])
            if 0 > a:
                a = (a & 2147483647)+2147483648
            a %= 10**6
            return str(a)+'.'+str(a^b)
        
        # 构建完对象以后要同步更新一下TKK值
        # self.update_TKK()  
    
    
    def update_TKK(self):
        url = "https://translate.google.cn/"
        req = urllib.request.Request(url=url, headers = self.header)
        page_source = urllib.request.urlopen(req).read().decode("utf-8")
        self.TKK = re.findall(r"tkk:'([0-9]+\.[0-9]+)'", page_source)[0]
        
        
    def construct_url(self):
        base = self.url + '?'
        for key in self.data:
            if isinstance(self.data[key], list):
                base = base + "dt=" + "&dt=".join(self.data[key]) + "&"
            else:
                base = base + key + '=' + self.data[key] + '&'
        base = base[:-1]
        return base
    
    def query(self, q, lang_to=''): 
        self.data['q'] = urllib.parse.quote(q)
        self.data['tk'] = self.js_fun().wo(q, self.TKK)
        self.data['tl'] = lang_to
        url = self.construct_url()
        req = urllib.request.Request(url=url, headers=self.header)
        response = json.loads(urllib.request.urlopen(req).read().decode("utf-8"))
        targetText = response[0][0][0]
        originalText = response[0][0][1]
        originalLanguageCode = response[2]
        print("翻译前:{},翻译前code:{}".format(originalText, originalLanguageCode))
        print("翻译后:{}, 翻译后code:{}".format(targetText, lang_to))
        return originalText, originalLanguageCode, targetText, lang_to


if __name__ == '__main__':
    text = "Hello world"
    originalText, originalLanguageCode, targetText, targetLanguageCode = GoogleTrans().query(text, lang_to='zh-CN')  
    print(originalText, originalLanguageCode, targetText, targetLanguageCode)
@yl12053 yl12053 changed the title 对Python版本 Jun 21, 2021
@yl12053
Copy link
Author

yl12053 commented Jun 21, 2021

对了 还有��是 建议使用requests模组
也不知道为什么, 改用requests之后基本上就没怎么出现429了
代码如下

#! /usr/bin/env python
# -*- coding:utf-8 -*-

import urllib.parse
import requests
import json
import re
import ssl
import ctypes
ssl._create_default_https_context = ssl._create_unverified_context


class GoogleTrans(object):
    def __init__(self):
        self.url = 'https://translate.google.cn/translate_a/single'
        self.TKK = "434674.96463358"  # 随时都有可能需要更新的TKK值
        
        self.header = {
            "accept": "*/*",
            "accept-language": "zh-CN,zh;q=0.9",
            "cookie": "NID=188=M1p_rBfweeI_Z02d1MOSQ5abYsPfZogDrFjKwIUbmAr584bc9GBZkfDwKQ80cQCQC34zwD4ZYHFMUf4F59aDQLSc79_LcmsAihnW0Rsb1MjlzLNElWihv-8KByeDBblR2V1kjTSC8KnVMe32PNSJBQbvBKvgl4CTfzvaIEgkqss",
            "referer": "https://translate.google.cn/",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
            "x-client-data": "CJK2yQEIpLbJAQjEtskBCKmdygEIqKPKAQi5pcoBCLGnygEI4qjKAQjxqcoBCJetygEIza3KAQ==",
        }
        
        self.data = {
            "client": "webapp",  # 基于网页访问服务器
            "sl": "auto",  # 源语言,auto表示由谷歌自动识别
            "tl": "vi",  # 翻译的目标语言
            "hl": "zh-CN",  # 界面语言选中文,毕竟URL都是cn后缀了,就不装美国人了
            "dt": ["at", "bd", "ex", "ld", "md", "qca", "rw", "rm", "ss", "t"],  # dt表示要求服务器返回的数据类型
            "otf": "2", 
            "ssel": "0",
            "tsel": "0",
            "kc": "1",
            "tk": "",  # 谷歌服务器会核对的token
            "q": ""  # 待翻译的字符串
        }
    class js_fun():
        def rshift(self, val, n): return (val % 0x100000000) >> n
        def Number(self, val):
            try:
                return eval(val, {}, {})
            except:
                return 0
        class Undefined:
            def __init__():
                pass
        class js_array():
            def __init__(self, outer, init=[]):
                self.outer = outer
                self.storage = list(init).copy()
            def __getitem__(self, key):
                if (type(key).__name__ != 'int'):
                    if (type(key).__name__ == 'float') and int(key) != key:
                        return 0
                    try:
                        key = int(key)
                    except:
                        return 0
                if len(self.storage)<=key or key<0:
                    return 0
                return self.storage[key]
            def __setitem__(self, key, value):
                if (type(key).__name__ != 'int'):
                    if (type(key).__name__ == 'float') and int(key) != key:
                        return 0
                    try:
                        key = int(key)
                    except:
                        return 0
                if key<0:
                    return 0
                while key >= len(self.storage):
                    self.storage.append(0)
                self.storage[key] = value
                return
            def __len__(self):
                return len(self.storage)
            def __str__(self):
                return self.storage.__str__()
            def __repr__(self):
                return self.storage.__repr__()
        def array(self, init = []):
            return self.js_array(self, init)
        def uo(self, a, b):
            for c in range(0, len(b)-2, 3):
                d = b[c+2]
                if 'a' <= d:
                    d = ord(d)-87
                else:
                    d = self.Number(d)
                if '+' == b[c+1]:
                    d = self.rshift(a, d)
                else:
                    d = a<<d
                if b[c] == "+":
                    a = a + d & 4294967295
                else:
                    a = a ^ d
            return a
        def wo(self, a, tkk):
            d = self.array(init = tkk.split("."))
            b = self.Number(d[0])
            e = self.array()
            f = 0
            g = 0
            while g < len(a):
                h = ord(a[g])
                if 128 > h:
                    e[f] = h
                    f += 1
                else:
                    if 2048 > h:
                        e[f] = h >> 6 | 192
                        f += 1
                    else:
                        if (55296 == (h & 64512)) and (g + 1 < len(a)) and (56320 == (ord(a[g+1]) & 64512)):
                            h = 65536 + ((h & 1023) << 10) + (ord(a[g+1]) & 1023)
                            g += 1
                            e[f] = h >> 18 | 240
                            f += 1
                            e[f] = h >> 12 & 63 | 128
                            f += 1
                        else:
                            e[f] = h >> 12 | 224
                            f += 1
                            e[f] = h >> 6 & 63 | 128
                            f += 1
                    e[f] = h & 63 | 128
                    f += 1
                g += 1
            a = b
            
            for f in range(0, len(e)):
                a += e[f]
                a = ctypes.c_long(a).value
                a = self.uo(a, '+-a^+6')
            a = self.uo(a, '+-3^+b+-f')
            a ^= self.Number(d[1])
            if 0 > a:
                a = (a & 2147483647)+2147483648
            a %= 10**6
            return str(a)+'.'+str(a^b)
        
        # 构建完对象以后要同步更新一下TKK值
        # self.update_TKK()  
    
    
    def update_TKK(self):
        url = "https://translate.google.cn/"
        req = requests.get(url, headers=self.header)
        page_source = req.text
        self.TKK = re.findall(r"tkk:'([0-9]+\.[0-9]+)'", page_source)[0]
        
        
    def construct_url(self):
        base = self.url + '?'
        for key in self.data:
            if isinstance(self.data[key], list):
                base = base + "dt=" + "&dt=".join(self.data[key]) + "&"
            else:
                base = base + key + '=' + self.data[key] + '&'
        base = base[:-1]
        return base
    
    def query(self, q, lang_to=''): 
        self.data['q'] = urllib.parse.quote(q)
        self.data['tk'] = self.js_fun().wo(q, self.TKK)
        self.data['tl'] = lang_to
        url = self.construct_url()
        robj = requests.post(url)
        response = json.loads(robj.text)
        targetText = response[0][0][0]
        originalText = response[0][0][1]
        originalLanguageCode = response[2]
        print("翻译前:{},翻译前code:{}".format(originalText, originalLanguageCode))
        print("翻译后:{}, 翻译后code:{}".format(targetText, lang_to))
        return originalText, originalLanguageCode, targetText, lang_to


if __name__ == '__main__':
    text = "Hello world"
    originalText, originalLanguageCode, targetText, targetLanguageCode = GoogleTrans().query(text, lang_to='zh-CN')  
    print(originalText, originalLanguageCode, targetText, targetLanguageCode)
@VictorZhang2014
Copy link
Owner

VictorZhang2014 commented Sep 10, 2021

@yl12053 这样挺不错的!就不需要调用exexjs库了,全部都是纯Python代码了!手动狗头 👍🏻

@mannan291
Copy link

我想知道是否需要在请求模块中使用代理。我试过了,但它要求一个主机名。我想这是因为在标题中创建了 cookie 会话。

I want to know if there is a need to use proxies in the requests module. I tried but it is asking for a hostname. I guess it is because of the cookies session created in the headers.

@wangchuanbing
Copy link

测试了一些单词,发现谷歌翻译接口会返回错误翻译,在页面翻译时不会出现
比如:
alter 翻译为: 年龄 页面翻译为:改变
mindful shopper 翻译为: 迈出的购物者 页面翻译为:细心的购物者
应该是谷歌返回值的问题

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
4 participants