readerController.js 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import moment from 'moment';
  2. import fs from 'fs';
  3. import { promises as fsPromises } from 'fs';
  4. import commonModel from '../../model/commonModel.js';
  5. import config from '../../config/index.js';
  6. import _ from 'lodash';
  7. import axios from 'axios';
  8. import { stringUtils } from '../../util/stringClass.js';
  9. import { globalCache } from '../../util/GlobalCache.js';
  10. import machineTranslationAPI from './machineTranslation.js';
  11. import path from 'path';
  12. import { fileURLToPath } from 'url';
  13. const __filename = fileURLToPath(import.meta.url);
  14. const __dirname = path.dirname(__filename);
  15. export async function GetReaderBooks(ctx) {
  16. let result = [
  17. {
  18. Title: "Strange-Life-of-a-Cat",
  19. },{
  20. Title: "Throne-of-Magical-Arcana",
  21. },{
  22. Title: "Zhui-Xu",
  23. },{
  24. Title: "Hidden-Assassin",
  25. },{
  26. Title: "Release-that-Witch",
  27. },{
  28. Title: "The-Legendary-Mechanic",
  29. },
  30. ];
  31. ctx.body = { "errcode": 10000, result };
  32. }
  33. export async function GetReaderBooksChapter(ctx) {
  34. const param = {
  35. Title: ctx.query.Title || "",
  36. };
  37. let result = [];
  38. const { readdir, readFile, writeFile } = fs.promises;
  39. let sourceDir = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '_contents');
  40. const files = (await readdir(sourceDir))
  41. .filter(file => file.endsWith('.html'))
  42. .sort((a, b) => parseInt(a.split('_')[0]) - parseInt(b.split('_')[0]));
  43. for (const file of files) {
  44. // 提取章节标题
  45. const titleMatch = file.match(/_([^\.]+)\.html$/);
  46. const title = titleMatch ? titleMatch[1] : file;
  47. result.push(title);
  48. }
  49. ctx.body = { "errcode": 10000, result: result };
  50. }
  51. export async function GetReaderBooksChapterContent(ctx) {
  52. const param = {
  53. Title: ctx.query.Title || "",
  54. Chapter: ctx.query.Chapter || "0",
  55. };
  56. let result = [];
  57. const {readFile} = fs.promises;
  58. // 读取文本文件
  59. let filePath = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '.html');
  60. const content = await fs.promises.readFile(filePath, 'utf-8');
  61. console.log(`已读取文件: ${filePath}`);
  62. const chapters = content.split('\n');
  63. let b=false;
  64. // 处理每个章节
  65. for (const chapter of chapters) {
  66. if (b && chapter && chapter.indexOf("<h2>Chapter")>=0) {
  67. break; // 找到下一个章节标题,停止处理
  68. }
  69. if (chapter === "<h2>Chapter "+ param.Chapter+"</h2>") {
  70. b=true;
  71. }
  72. if (b) {
  73. let chapter2=chapter.replace(/<br\s*\/?>/gi, '\n').replace(/&nbsp;/g, ' ').replace(/<h2>/g, '\n\n').replace(/<\/h2>/g, '\n\n').replace(/<p>/g, '\n').replace(/<\/p>/g, '\n').replace(/<[^>]+>/g, '').trim();
  74. // 确保每个句子标点符号(句号、问号、感叹号)后面有一个空格
  75. chapter2 = chapter2.replace(/([.!?])(?=\S)/g, '$1 ');
  76. // 按句子分割文本,形成段落内的句子数组
  77. if (chapter2) {
  78. // 创建当前段落的句子数组
  79. let paragraphSentences = [];
  80. // 先将文本按句子分割
  81. // 使用正则表达式匹配句子结束标志:句号、问号、感叹号后跟空格或结束
  82. const sentenceParts = chapter2.split(/(?<=[.!?])\s+/);
  83. //console.log('初始分割的句子:', sentenceParts);
  84. // 对每个句子部分处理引号
  85. for (const part of sentenceParts) {
  86. if (!part.trim()) continue;
  87. let currentText = '';
  88. for (let i = 0; i < part.length; i++) {
  89. if (part[i] === '"') {
  90. // 如果是引号,先添加之前的文本(如果有)
  91. if (currentText.trim()) {
  92. paragraphSentences.push(currentText.trim());
  93. }
  94. // 添加引号作为单独元素
  95. paragraphSentences.push('"');
  96. currentText = '';
  97. } else {
  98. currentText += part[i];
  99. }
  100. }
  101. // 添加剩余文本(如果有)
  102. if (currentText.trim()) {
  103. // 确保非引号文本末尾有空格
  104. let trimmedText = currentText.trim();
  105. if (!trimmedText.endsWith('"') && !trimmedText.endsWith(' ')) {
  106. trimmedText += ' ';
  107. }
  108. paragraphSentences.push(trimmedText);
  109. }
  110. }
  111. // 只有当段落中有句子时,才将该段落添加到结果中
  112. if (paragraphSentences.length > 0) {
  113. result.push(paragraphSentences);
  114. }
  115. }
  116. }
  117. }
  118. result=result.slice(1);
  119. ctx.body = { "errcode": 10000, result: result };
  120. }