|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+import moment from 'moment';
|
|
|
2
|
+import fs from 'fs';
|
|
|
3
|
+import { promises as fsPromises } from 'fs';
|
|
|
4
|
+import commonModel from '../../model/commonModel.js';
|
|
|
5
|
+import config from '../../config/index.js';
|
|
|
6
|
+import _ from 'lodash';
|
|
|
7
|
+import axios from 'axios';
|
|
|
8
|
+import { stringUtils } from '../../util/stringClass.js';
|
|
|
9
|
+import { globalCache } from '../../util/GlobalCache.js';
|
|
|
10
|
+import machineTranslationAPI from './machineTranslation.js';
|
|
|
11
|
+import path from 'path';
|
|
|
12
|
+import { fileURLToPath } from 'url';
|
|
|
13
|
+
|
|
|
14
|
+
|
|
|
15
|
+const __filename = fileURLToPath(import.meta.url);
|
|
|
16
|
+const __dirname = path.dirname(__filename);
|
|
|
17
|
+
|
|
|
18
|
+export async function GetReaderBooks(ctx) {
|
|
|
19
|
+ let result = [
|
|
|
20
|
+ {
|
|
|
21
|
+ Title: "Strange-Life-of-a-Cat",
|
|
|
22
|
+ },{
|
|
|
23
|
+ Title: "Throne-of-Magical-Arcana",
|
|
|
24
|
+ },{
|
|
|
25
|
+ Title: "Zhui-Xu",
|
|
|
26
|
+ },{
|
|
|
27
|
+ Title: "Hidden-Assassin",
|
|
|
28
|
+ },{
|
|
|
29
|
+ Title: "Release-that-Witch",
|
|
|
30
|
+ },{
|
|
|
31
|
+ Title: "The-Legendary-Mechanic",
|
|
|
32
|
+ },
|
|
|
33
|
+ ];
|
|
|
34
|
+
|
|
|
35
|
+ ctx.body = { "errcode": 10000, result };
|
|
|
36
|
+}
|
|
|
37
|
+
|
|
|
38
|
+export async function GetReaderBooksChapter(ctx) {
|
|
|
39
|
+ const param = {
|
|
|
40
|
+ Title: ctx.query.Title || "",
|
|
|
41
|
+ };
|
|
|
42
|
+
|
|
|
43
|
+ let result = [];
|
|
|
44
|
+
|
|
|
45
|
+ const { readdir, readFile, writeFile } = fs.promises;
|
|
|
46
|
+ let sourceDir = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '_contents');
|
|
|
47
|
+ const files = (await readdir(sourceDir))
|
|
|
48
|
+ .filter(file => file.endsWith('.html'))
|
|
|
49
|
+ .sort((a, b) => parseInt(a.split('_')[0]) - parseInt(b.split('_')[0]));
|
|
|
50
|
+
|
|
|
51
|
+ for (const file of files) {
|
|
|
52
|
+ // 提取章节标题
|
|
|
53
|
+ const titleMatch = file.match(/_([^\.]+)\.html$/);
|
|
|
54
|
+ const title = titleMatch ? titleMatch[1] : file;
|
|
|
55
|
+ result.push(title);
|
|
|
56
|
+ }
|
|
|
57
|
+
|
|
|
58
|
+ ctx.body = { "errcode": 10000, result: result };
|
|
|
59
|
+}
|
|
|
60
|
+
|
|
|
61
|
+export async function GetReaderBooksChapterContent(ctx) {
|
|
|
62
|
+ const param = {
|
|
|
63
|
+ Title: ctx.query.Title || "",
|
|
|
64
|
+ Chapter: ctx.query.Chapter || "0",
|
|
|
65
|
+ };
|
|
|
66
|
+
|
|
|
67
|
+ let result = [];
|
|
|
68
|
+
|
|
|
69
|
+ const {readFile} = fs.promises;
|
|
|
70
|
+ // 读取文本文件
|
|
|
71
|
+ let filePath = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '.html');
|
|
|
72
|
+
|
|
|
73
|
+ const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
|
74
|
+ console.log(`已读取文件: ${filePath}`);
|
|
|
75
|
+
|
|
|
76
|
+ const chapters = content.split('\n');
|
|
|
77
|
+ let b=false;
|
|
|
78
|
+ // 处理每个章节
|
|
|
79
|
+ for (const chapter of chapters) {
|
|
|
80
|
+ if (b && chapter && chapter.indexOf("<h2>Chapter")>=0) {
|
|
|
81
|
+ break; // 找到下一个章节标题,停止处理
|
|
|
82
|
+ }
|
|
|
83
|
+
|
|
|
84
|
+ if (chapter === "<h2>Chapter "+ param.Chapter+"</h2>") {
|
|
|
85
|
+ b=true;
|
|
|
86
|
+ }
|
|
|
87
|
+ if (b) {
|
|
|
88
|
+ let chapter2=chapter.replace(/<br\s*\/?>/gi, '\n').replace(/ /g, ' ').replace(/<h2>/g, '\n\n').replace(/<\/h2>/g, '\n\n').replace(/<p>/g, '\n').replace(/<\/p>/g, '\n').replace(/<[^>]+>/g, '').trim();
|
|
|
89
|
+
|
|
|
90
|
+ // 确保每个句子标点符号(句号、问号、感叹号)后面有一个空格
|
|
|
91
|
+ chapter2 = chapter2.replace(/([.!?])(?=\S)/g, '$1 ');
|
|
|
92
|
+
|
|
|
93
|
+ // 按句子分割文本,形成段落内的句子数组
|
|
|
94
|
+ if (chapter2) {
|
|
|
95
|
+ // 创建当前段落的句子数组
|
|
|
96
|
+ let paragraphSentences = [];
|
|
|
97
|
+
|
|
|
98
|
+ // 先将文本按句子分割
|
|
|
99
|
+ // 使用正则表达式匹配句子结束标志:句号、问号、感叹号后跟空格或结束
|
|
|
100
|
+ const sentenceParts = chapter2.split(/(?<=[.!?])\s+/);
|
|
|
101
|
+ //console.log('初始分割的句子:', sentenceParts);
|
|
|
102
|
+
|
|
|
103
|
+ // 对每个句子部分处理引号
|
|
|
104
|
+ for (const part of sentenceParts) {
|
|
|
105
|
+ if (!part.trim()) continue;
|
|
|
106
|
+
|
|
|
107
|
+ let currentText = '';
|
|
|
108
|
+ for (let i = 0; i < part.length; i++) {
|
|
|
109
|
+ if (part[i] === '"') {
|
|
|
110
|
+ // 如果是引号,先添加之前的文本(如果有)
|
|
|
111
|
+ if (currentText.trim()) {
|
|
|
112
|
+ paragraphSentences.push(currentText.trim());
|
|
|
113
|
+ }
|
|
|
114
|
+ // 添加引号作为单独元素
|
|
|
115
|
+ paragraphSentences.push('"');
|
|
|
116
|
+ currentText = '';
|
|
|
117
|
+ } else {
|
|
|
118
|
+ currentText += part[i];
|
|
|
119
|
+ }
|
|
|
120
|
+ }
|
|
|
121
|
+
|
|
|
122
|
+ // 添加剩余文本(如果有)
|
|
|
123
|
+ if (currentText.trim()) {
|
|
|
124
|
+ // 确保非引号文本末尾有空格
|
|
|
125
|
+ let trimmedText = currentText.trim();
|
|
|
126
|
+ if (!trimmedText.endsWith('"') && !trimmedText.endsWith(' ')) {
|
|
|
127
|
+ trimmedText += ' ';
|
|
|
128
|
+ }
|
|
|
129
|
+ paragraphSentences.push(trimmedText);
|
|
|
130
|
+ }
|
|
|
131
|
+ }
|
|
|
132
|
+
|
|
|
133
|
+ // 只有当段落中有句子时,才将该段落添加到结果中
|
|
|
134
|
+ if (paragraphSentences.length > 0) {
|
|
|
135
|
+ result.push(paragraphSentences);
|
|
|
136
|
+ }
|
|
|
137
|
+ }
|
|
|
138
|
+ }
|
|
|
139
|
+ }
|
|
|
140
|
+
|
|
|
141
|
+
|
|
|
142
|
+ result=result.slice(1);
|
|
|
143
|
+
|
|
|
144
|
+ ctx.body = { "errcode": 10000, result: result };
|
|
|
145
|
+}
|