最近工作需要,在进行机翻的尝试,整理了一下Google和Bing的API,现在将相关代码贴出来:
主类,Translate.class.php
文件内容:
<?php
defined('LIB_PATH') || define('LIB_PATH', '/usr/htdocs/class');
/**
* 翻译处理类
*/
class cmsTranslate {
// 日志存储
protected $api = null;
private $source_lang = 'zh-cn';
private $target_lang = 'en';
public function __construct($api, $from = 'zh-cn', $to = 'en', $other = ''){
$filename = LIB_PATH . '/Driver/Translate/' . ucwords($api) . '.class.php';
if(file_exists($filename)) {
include_once $filename;
} else {
throw Exception($api . ' is not found!');
}
if($from) {
$this->source_lang = $from;
}
if($to) {
$this->target_lang = $to;
}
$class = 'translate'.ucwords($api);
$this->api = new $class($this->source_lang, $this->target_lang, $other);
}
public function getError() {
return $this->api->getError();
}
/**
* 翻译指定内容
* @param string $text 要翻译的内容
* @return string
*/
public function translate($text) {
if(!$this->api) {
E('翻译引擎不存在!');
}
return $this->api->translate($text);
}
}
谷歌引擎:Driver/Translate/Google.class.php
代码:
<?php
class translateGoogle {
private $tkk = '435578.903374698';
private $url = 'https://translate.google.com/translate_a/single';
private $from = 'zh-CN';
private $to = 'en';
private $error = '';
public function __construct($from = '', $to = '', $tk = '') {
if($from) {
$this->from = $from;
}
if($to) {
$this->to = $to;
}
if($tk) {
$this->tkk = $tk;
}
}
public function translate($text) {
$text = trim($text);
if(!$text) {
return '';
}
$text = str_replace(['<br>', '<p>', '</p>', '<br />', '<br/>'], "\r\n", $text);
$text = strip_tags($text);
$lines = explode("\n", $text);
$result = '';
$tmp = '';
foreach($lines as $line) {
$line = trim($line);
if(!$line) {
continue;
}
if(!$tmp || (mb_strlen($tmp, 'utf-8') + mb_strlen($line, 'utf-8')<500)) {
$tmp .= "\n".$line;
} else {
//调用API进行翻译
$res = $this->_trans($tmp);
if($res===false) {
return $res;
} else {
$result .= "\n" . $res;
$tmp = '';
}
}
}
if($tmp) {
$res = $this->_trans($tmp);
if($res === false) {
return $res;
} else {
$result .= "\n" . $this->_trans($tmp);
$tmp = '';
}
}
return trim($result);
}
public function getError(){
return $this->error;
}
private function _curl($url, $query = []) {
if($query) {
$query_str = http_build_query($query);
$url = $url . $query_str;
}
// pre($url);
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_HTTPHEADER => array(
"Cache-Control: no-cache",
"User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36"
),
));
$response = curl_exec($curl);
if(curl_errno($curl)) {
$this->error = 'CURL_ERROR:'.curl_error($curl);
$response = false;
}
curl_close($curl);
return $response;
}
private function _trans($text) {
$text = trim($text);
if(!$text) {
return 'NULL';
}
$url = 'https://translate.google.com/translate_a/single?client=webapp&sl=auto&tl=en&hl=auto&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&otf=1&ssel=3&tsel=3&kc=1&';
$query = [
'tk' => $this->TL($text),
'q' => $text
];
// pre($query['tk']);
$response = $this->_curl($url, $query);
if($response) {
$result = json_decode($response);
if($result) {
$tmp = $result[0];
//处理翻译结果
$result = '';
foreach($tmp as $v) {
if(isset($v['8'])) {
$result .= $v[0];
}
}
} else {
pre('翻译出错:' . $text);
pre($response);
$this->error = strip_tags($response);
$result = false;
}
} else {
$result = false;
}
return $result;
}
private function shr32($x, $bits) {
if($bits <= 0){
return $x;
}
if($bits >= 32){
return 0;
}
$bin = decbin($x);
$l = strlen($bin);
if($l > 32){
$bin = substr($bin, $l - 32, 32);
}elseif($l < 32){
$bin = str_pad($bin, 32, '0', STR_PAD_LEFT);
}
return bindec(str_pad(substr($bin, 0, 32 - $bits), 32, '0', STR_PAD_LEFT));
}
private function charCodeAt($str, $index) {
$char = mb_substr($str, $index, 1, 'UTF-8');
$res = json_encode($char);
$res = substr($res, 1, -1);
if(substr($res, 0, 2) == '\\u') {
$ret = hexdec(substr($res, 2));
} else {
$ret = ord($char);
}
return $ret;
}
private function RL($a, $b) {
for($c = 0; $c < strlen($b) - 2; $c +=3) {
$d = $b{$c+2};
$d = $d >= 'a' ? $this->charCodeAt($d,0) - 87 : intval($d);
$d = $b{$c+1} == '+' ? $this->shr32($a, $d) : $a << $d;
$a = $b{$c} == '+' ? ($a + $d & 4294967295) : $a ^ $d;
}
return $a;
}
//直接复制google
private function TL($a) {
if($this->tkk == 'auto') {
$url = 'https://translate.google.com/';
$result = $this->_curl($url);
$regex = '@tkk\s*:\s*\'([^\']+)\'@';
if(preg_match($regex, $result, $match)) {
$this->tkk = $match[1];
} else {
$this->tkk = '435578.903374698';
}
}
$tkk = explode('.', $this->tkk);
$b = $tkk[0];
for($d = array(), $e = 0, $f = 0; $f < mb_strlen ( $a, 'UTF-8' ); $f ++) {
$g = $this->charCodeAt ( $a, $f );
if (128 > $g) {
$d [$e ++] = $g;
} else {
if (2048 > $g) {
$d [$e ++] = $g >> 6 | 192;
} else {
if (55296 == ($g & 64512) && $f + 1 < mb_strlen ( $a, 'UTF-8' ) && 56320 == ($this->charCodeAt ( $a, $f + 1 ) & 64512)) {
$g = 65536 + (($g & 1023) << 10) + ($this->charCodeAt ( $a, ++ $f ) & 1023);
$d [$e ++] = $g >> 18 | 240;
$d [$e ++] = $g >> 12 & 63 | 128;
} else {
$d [$e ++] = $g >> 12 | 224;
$d [$e ++] = $g >> 6 & 63 | 128;
}
}
$d [$e ++] = $g & 63 | 128;
}
}
$a = $b;
for($e = 0; $e < count ( $d ); $e ++) {
$a += $d [$e];
$a = $this->RL ( $a, '+-a^+6' );
}
$a = $this->RL ( $a, "+-3^+b+-f" );
$a ^= $tkk[1];
if (0 > $a) {
$a = ($a & 2147483647) + 2147483648;
}
$a = fmod ( $a, pow ( 10, 6 ) );
return $a . "." . ($a ^ $b);
}
}
Bing 引擎:Driver/Translate/Bing.class.php
代码:
<?php
class translateBing {
private $url = 'http://api.microsofttranslator.com/V2/Ajax.svc/Translate';
private $from = 'zh-cn';
private $to = 'en';
private $error = '';
public function __construct($from = '', $to = '') {
if($from) {
$this->from = $from;
}
if($to) {
$this->to = $to;
}
}
public function translate($text) {
$text = trim($text);
if(!$text) {
return '';
}
$text = str_replace(['<br>', '<p>', '</p>', '<br />', '<br/>'], "\r\n", $text);
$text = strip_tags($text);
$lines = explode("\n", $text);
$result = '';
$tmp = '';
foreach($lines as $line) {
$line = trim($line);
if(!$line) {
continue;
}
if(mb_strlen($tmp, 'utf-8') + mb_strlen($line, 'utf-8')<3000) {
$tmp .= "\n".$line;
} else {
//调用API进行翻译
$res = $this->_trans($tmp);
if($res) {
$result .= "\n" . $this->_trans($tmp);
$tmp = '';
} else {
return $res;
}
}
}
if($tmp) {
$res = $this->_trans($tmp);
if($res) {
$result .= "\n" . $this->_trans($tmp);
$tmp = '';
} else {
return $res;
}
}
return $result;
}
public function getError(){
return $this->error;
}
private function _trans($text) {
$query = [
'oncomplete' => 'json_decode',
'appId' => 'A4D660A48A6A97CCA791C34935E4C02BBB1BEC1C',
'from' => $this->from,
'to' => $this->to,
'text' => $text
];
$query_str = http_build_query($query);
$url = $this->url . '?' . $query_str;
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => 2,
CURLOPT_CUSTOMREQUEST => "GET",
CURLOPT_HTTPHEADER => array(
"Cache-Control: no-cache",
"User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36"
),
));
$response = curl_exec($curl);
if(curl_errno($curl)) {
$this->error = 'CURL_ERROR:'.curl_error($curl);
$response = false;
} else if(substr($response, 0, 11)!='json_decode') {
$this->error = $response;
$response = false;
}
curl_close($curl);
return $response;
}
}
使用方法:
defined('LIB_PATH') || define('LIB_PATH', '/usr/htdocs/class');
include LIB_PATH . '/Translate.class.php';
$obj = new cmsTranslate('google');
$contents = [
'我爱北京天安门'
];
$txt = $obj->translate($content);
if($txt === false) {
exit($obj->getError());
} else {
echo '翻译结果:' . PHP_EOL;
print_r($txt);
}