|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+import fs from 'fs';
|
|
|
2
|
+import path from 'path';
|
|
|
3
|
+import axios from 'axios';
|
|
|
4
|
+import * as cheerio from 'cheerio'; // 用于解析HTML
|
|
|
5
|
+import { fileURLToPath } from 'url';
|
|
|
6
|
+import crypto from 'crypto';
|
|
|
7
|
+
|
|
|
8
|
+// 获取当前文件的目录路径
|
|
|
9
|
+const __filename = fileURLToPath(import.meta.url);
|
|
|
10
|
+const __dirname = path.dirname(__filename);
|
|
|
11
|
+
|
|
|
12
|
+// 腾讯云翻译API配置(需替换为你的实际配置)
|
|
|
13
|
+const TENCENT_TRANSLATE_API = {
|
|
|
14
|
+ url: 'https://tmt.tencentcloudapi.com/',
|
|
|
15
|
+ secretId: 'AKID2GI7i7JH2av3yf9RVfkeMn6C5lMk0591',
|
|
|
16
|
+ secretKey: 'qUzMXYMxKUQ9eFnBNM9zgCZlmbHm6DDC',
|
|
|
17
|
+ region: 'ap-shanghai',
|
|
|
18
|
+ projectId: 0,
|
|
|
19
|
+};
|
|
|
20
|
+
|
|
|
21
|
+// 读取HTML文件内容
|
|
|
22
|
+function readHtmlFile(filePath) {
|
|
|
23
|
+ return fs.readFileSync(filePath, 'utf-8');
|
|
|
24
|
+}
|
|
|
25
|
+
|
|
|
26
|
+// 解析HTML文件,提取章节内容
|
|
|
27
|
+function parseHtmlChapters(htmlContent) {
|
|
|
28
|
+ const $ = cheerio.load(htmlContent);
|
|
|
29
|
+ const chapters = {};
|
|
|
30
|
+
|
|
|
31
|
+ // 查找所有h2标签作为章节标题
|
|
|
32
|
+ $('h2').each((index, element) => {
|
|
|
33
|
+ const chapterTitle = $(element).text().trim();
|
|
|
34
|
+ const paragraphs = [];
|
|
|
35
|
+
|
|
|
36
|
+ // 获取当前h2标签后面的所有p标签,直到下一个h2标签
|
|
|
37
|
+ let currentElement = $(element).next();
|
|
|
38
|
+ while (currentElement.length && !currentElement.is('h2')) {
|
|
|
39
|
+ if (currentElement.is('p')) {
|
|
|
40
|
+ const text = currentElement.text().trim();
|
|
|
41
|
+ if (text) {
|
|
|
42
|
+ paragraphs.push(text);
|
|
|
43
|
+ }
|
|
|
44
|
+ }
|
|
|
45
|
+ currentElement = currentElement.next();
|
|
|
46
|
+ }
|
|
|
47
|
+
|
|
|
48
|
+ if (paragraphs.length > 0) {
|
|
|
49
|
+ chapters[chapterTitle] = paragraphs;
|
|
|
50
|
+ }
|
|
|
51
|
+ });
|
|
|
52
|
+
|
|
|
53
|
+ return chapters;
|
|
|
54
|
+}
|
|
|
55
|
+
|
|
|
56
|
+// 生成腾讯云API签名
|
|
|
57
|
+function generateTencentCloudSignature(secretKey, timestamp, date, payload) {
|
|
|
58
|
+ // 步骤1: 拼接规范请求串
|
|
|
59
|
+ const httpRequestMethod = 'POST';
|
|
|
60
|
+ const canonicalUri = '/';
|
|
|
61
|
+ const canonicalQueryString = '';
|
|
|
62
|
+ const contentType = 'application/json';
|
|
|
63
|
+ const canonicalHeaders = `content-type:${contentType}\nhost:tmt.tencentcloudapi.com\n`;
|
|
|
64
|
+ const signedHeaders = 'content-type;host';
|
|
|
65
|
+ const hashedRequestPayload = crypto.createHash('sha256').update(JSON.stringify(payload)).digest('hex');
|
|
|
66
|
+ const canonicalRequest = `${httpRequestMethod}\n${canonicalUri}\n${canonicalQueryString}\n${canonicalHeaders}\n${signedHeaders}\n${hashedRequestPayload}`;
|
|
|
67
|
+
|
|
|
68
|
+ // 步骤2: 拼接待签名字符串
|
|
|
69
|
+ const algorithm = 'TC3-HMAC-SHA256';
|
|
|
70
|
+ const credentialScope = `${date}/tmt/tc3_request`;
|
|
|
71
|
+ const hashedCanonicalRequest = crypto.createHash('sha256').update(canonicalRequest).digest('hex');
|
|
|
72
|
+ const stringToSign = `${algorithm}\n${timestamp}\n${credentialScope}\n${hashedCanonicalRequest}`;
|
|
|
73
|
+
|
|
|
74
|
+ // 步骤3: 计算签名
|
|
|
75
|
+ const secretDate = crypto.createHmac('sha256', `TC3${secretKey}`).update(date).digest();
|
|
|
76
|
+ const secretService = crypto.createHmac('sha256', secretDate).update('tmt').digest();
|
|
|
77
|
+ const secretSigning = crypto.createHmac('sha256', secretService).update('tc3_request').digest();
|
|
|
78
|
+ const signature = crypto.createHmac('sha256', secretSigning).update(stringToSign).digest('hex');
|
|
|
79
|
+
|
|
|
80
|
+ return signature;
|
|
|
81
|
+}
|
|
|
82
|
+
|
|
|
83
|
+// 调用腾讯云翻译API
|
|
|
84
|
+async function translateText(text) {
|
|
|
85
|
+ try {
|
|
|
86
|
+ const timestamp = Math.floor(Date.now() / 1000);
|
|
|
87
|
+ const date = new Date().toISOString().split('T')[0];
|
|
|
88
|
+
|
|
|
89
|
+ const payload = {
|
|
|
90
|
+ SourceText: text,
|
|
|
91
|
+ Source: 'en',
|
|
|
92
|
+ Target: 'zh',
|
|
|
93
|
+ ProjectId: TENCENT_TRANSLATE_API.projectId,
|
|
|
94
|
+ };
|
|
|
95
|
+
|
|
|
96
|
+ const signature = generateTencentCloudSignature(
|
|
|
97
|
+ TENCENT_TRANSLATE_API.secretKey,
|
|
|
98
|
+ timestamp,
|
|
|
99
|
+ date,
|
|
|
100
|
+ payload
|
|
|
101
|
+ );
|
|
|
102
|
+
|
|
|
103
|
+ const authorization = `TC3-HMAC-SHA256 Credential=${TENCENT_TRANSLATE_API.secretId}/${date}/tmt/tc3_request, SignedHeaders=content-type;host, Signature=${signature}`;
|
|
|
104
|
+
|
|
|
105
|
+ const response = await axios.post(
|
|
|
106
|
+ TENCENT_TRANSLATE_API.url,
|
|
|
107
|
+ payload,
|
|
|
108
|
+ {
|
|
|
109
|
+ headers: {
|
|
|
110
|
+ 'Content-Type': 'application/json',
|
|
|
111
|
+ 'Host': 'tmt.tencentcloudapi.com',
|
|
|
112
|
+ 'X-TC-Action': 'TextTranslate',
|
|
|
113
|
+ 'X-TC-Version': '2018-03-21',
|
|
|
114
|
+ 'X-TC-Region': TENCENT_TRANSLATE_API.region,
|
|
|
115
|
+ 'X-TC-Timestamp': timestamp,
|
|
|
116
|
+ 'Authorization': authorization,
|
|
|
117
|
+ },
|
|
|
118
|
+ }
|
|
|
119
|
+ );
|
|
|
120
|
+
|
|
|
121
|
+ //console.log('翻译API响应:', JSON.stringify(response.data));
|
|
|
122
|
+ return response.data.Response.TargetText;
|
|
|
123
|
+ } catch (error) {
|
|
|
124
|
+ console.error('翻译失败:', error.message);
|
|
|
125
|
+ if (error.response) {
|
|
|
126
|
+ console.error('错误详情:', JSON.stringify(error.response.data));
|
|
|
127
|
+ }
|
|
|
128
|
+ return text; // 翻译失败时返回原文
|
|
|
129
|
+ }
|
|
|
130
|
+}
|
|
|
131
|
+
|
|
|
132
|
+// 主函数:生成翻译映射表
|
|
|
133
|
+async function generateTranslationMap(englishHtmlPath, chineseHtmlPath) {
|
|
|
134
|
+ const englishHtml = readHtmlFile(englishHtmlPath);
|
|
|
135
|
+ const chineseHtml = readHtmlFile(chineseHtmlPath);
|
|
|
136
|
+
|
|
|
137
|
+ const englishChapters = parseHtmlChapters(englishHtml);
|
|
|
138
|
+ const chineseChapters = parseHtmlChapters(chineseHtml);
|
|
|
139
|
+
|
|
|
140
|
+ console.log(`英文章节数: ${Object.keys(englishChapters).length}`);
|
|
|
141
|
+ console.log(`中文章节数: ${Object.keys(chineseChapters).length}`);
|
|
|
142
|
+
|
|
|
143
|
+ // 打印前几个章节标题以便调试
|
|
|
144
|
+ console.log('英文章节标题示例:', Object.keys(englishChapters).slice(0, 3));
|
|
|
145
|
+ console.log('中文章节标题示例:', Object.keys(chineseChapters).slice(0, 3));
|
|
|
146
|
+
|
|
|
147
|
+ const translationMap = {};
|
|
|
148
|
+
|
|
|
149
|
+ // 创建章节映射表 - 将英文章节与中文章节对应起来
|
|
|
150
|
+ const chapterMapping = {};
|
|
|
151
|
+ const englishChapterTitles = Object.keys(englishChapters);
|
|
|
152
|
+ const chineseChapterTitles = Object.keys(chineseChapters);
|
|
|
153
|
+
|
|
|
154
|
+ // 简单的章节匹配逻辑 - 按顺序一一对应
|
|
|
155
|
+ for (let i = 0; i < englishChapterTitles.length; i++) {
|
|
|
156
|
+ if (i < chineseChapterTitles.length) {
|
|
|
157
|
+ chapterMapping[englishChapterTitles[i]] = chineseChapterTitles[i];
|
|
|
158
|
+ }
|
|
|
159
|
+ }
|
|
|
160
|
+
|
|
|
161
|
+ for (const [englishTitle, paragraphs] of Object.entries(englishChapters)) {
|
|
|
162
|
+ const chineseTitle = chapterMapping[englishTitle];
|
|
|
163
|
+
|
|
|
164
|
+ if (!chineseTitle || !chineseChapters[chineseTitle]) {
|
|
|
165
|
+ console.warn(`警告: 中文文件中缺少对应章节 ${englishTitle}`);
|
|
|
166
|
+ continue;
|
|
|
167
|
+ }
|
|
|
168
|
+
|
|
|
169
|
+ translationMap[englishTitle] = [];
|
|
|
170
|
+ const chineseParagraphs = chineseChapters[chineseTitle];
|
|
|
171
|
+
|
|
|
172
|
+ for (let i = 0; i < paragraphs.length; i++) {
|
|
|
173
|
+ const englishSentence = paragraphs[i];
|
|
|
174
|
+ let chineseSentence = i < chineseParagraphs.length ? chineseParagraphs[i] : null;
|
|
|
175
|
+
|
|
|
176
|
+ // 如果中文句子不存在或为空,调用翻译API
|
|
|
177
|
+ if (!chineseSentence || chineseSentence.trim() === '') {
|
|
|
178
|
+ chineseSentence = await translateText(englishSentence);
|
|
|
179
|
+ console.log(`翻译: ${englishSentence} -> ${chineseSentence}`);
|
|
|
180
|
+ }
|
|
|
181
|
+
|
|
|
182
|
+ translationMap[englishTitle].push({
|
|
|
183
|
+ english: englishSentence,
|
|
|
184
|
+ chinese: chineseSentence,
|
|
|
185
|
+ });
|
|
|
186
|
+ }
|
|
|
187
|
+ }
|
|
|
188
|
+
|
|
|
189
|
+ return translationMap;
|
|
|
190
|
+}
|
|
|
191
|
+
|
|
|
192
|
+// 示例用法
|
|
|
193
|
+const englishHtmlPath = path.join(__dirname, 'Strange-Life-of-a-Cat/Strange-Life-of-a-Cat.html');
|
|
|
194
|
+const chineseHtmlPath = path.join(__dirname, '回到过去变成猫/回到过去变成猫.html');
|
|
|
195
|
+
|
|
|
196
|
+// 检查文件是否存在
|
|
|
197
|
+if (!fs.existsSync(englishHtmlPath)) {
|
|
|
198
|
+ console.error(`英文HTML文件不存在: ${englishHtmlPath}`);
|
|
|
199
|
+ process.exit(1);
|
|
|
200
|
+}
|
|
|
201
|
+
|
|
|
202
|
+if (!fs.existsSync(chineseHtmlPath)) {
|
|
|
203
|
+ console.error(`中文HTML文件不存在: ${chineseHtmlPath}`);
|
|
|
204
|
+ process.exit(1);
|
|
|
205
|
+}
|
|
|
206
|
+
|
|
|
207
|
+//console.log(`英文HTML文件路径: ${englishHtmlPath}`);
|
|
|
208
|
+//console.log(`中文HTML文件路径: ${chineseHtmlPath}`);
|
|
|
209
|
+
|
|
|
210
|
+// generateTranslationMap(englishHtmlPath, chineseHtmlPath)
|
|
|
211
|
+// .then((translationMap) => {
|
|
|
212
|
+// const outputPath = path.join(__dirname, 'translation_map.json');
|
|
|
213
|
+// fs.writeFileSync(outputPath, JSON.stringify(translationMap, null, 2), 'utf-8');
|
|
|
214
|
+// console.log(`翻译映射表已生成: ${outputPath}`);
|
|
|
215
|
+// console.log(`翻译了 ${Object.keys(translationMap).length} 个章节`);
|
|
|
216
|
+// })
|
|
|
217
|
+// .catch((error) => {
|
|
|
218
|
+// console.error('生成映射表失败:', error);
|
|
|
219
|
+// process.exit(1);
|
|
|
220
|
+// });
|
|
|
221
|
+
|
|
|
222
|
+let englishText="Thick smoke billowed. Every time he took a breath, it sounded like a broken bellows, as if it was burning his throat and lungs. Soon, Xiafeng's consciousness became blurry.";
|
|
|
223
|
+console.log(englishText);
|
|
|
224
|
+let text = await translateText(englishText);
|
|
|
225
|
+console.log(text);
|
|
|
226
|
+console.log("浓烟滚滚,每吸一口都发出破烂风箱般粗重的声音,像是在灼烧着咽喉和肺部,夏风的意识很快就模糊起来。");
|